In [None]:
import logging
import dspy 
import os
import mlflow
from dotenv import load_dotenv

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logging.getLogger("LiteLLM").setLevel(logging.WARN)
logging.getLogger("httpx").setLevel(logging.WARN)
logging.getLogger("httpx").setLevel(logging.WARN)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("dspy").setLevel(logging.INFO)


mlflow.set_tracking_uri("http://mlflow:8080/")
mlflow.set_experiment("deploy_dspy_program")
mlflow.dspy.autolog(
    log_compiles=True,    # Track optimization process
    log_evals=True,       # Track evaluation results
    log_traces_from_compile=True  # Track program traces during optimization
)

load_dotenv()

True

In [35]:
LLM_URL=os.getenv('LLM_URL')
API_KEY=os.getenv('API_KEY')
LLM_MODEL=os.getenv('LLM_MODEL')
dspy.enable_logging()
lm = dspy.LM(LLM_MODEL,
             api_base=LLM_URL,  # ensure this points to your port
             api_key=API_KEY, model_type='chat')
dspy.configure(lm=lm)
dspy.settings.configure(track_usage=True)

In [36]:
math = dspy.ChainOfThought("question -> answer: float")
x=math(question="Two dice are tossed. What is the probability that the sum equals two?")
print(x)


Prediction(
    reasoning='When two dice are tossed, each die has 6 faces, numbered from 1 to 6. The total number of possible outcomes when two dice are rolled is \\(6 \\times 6 = 36\\).\n\nTo find the probability that the sum of the numbers on the two dice equals 2, we need to determine how many outcomes result in this sum. The only way to achieve a sum of 2 is if both dice show a 1. Therefore, there is only 1 favorable outcome: (1, 1).\n\nThe probability of an event is calculated as the number of favorable outcomes divided by the total number of possible outcomes. Thus, the probability that the sum equals 2 is:\n\n\\[\n\\frac{\\text{Number of favorable outcomes}}{\\text{Total number of possible outcomes}} = \\frac{1}{36}\n\\]\n\nConverting this fraction to a float gives us approximately 0.027777777777777776.',
    answer=0.027777777777777776
)


In [None]:
print(x.get_lm_usage())

In [None]:
def evaluate_math(expression: str):
    return dspy.PythonInterpreter({}).execute(expression)

def search_wikipedia(query: str):
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=3)
    return [x['text'] for x in results]

react = dspy.ReAct("question -> answer: float", tools=[evaluate_math, search_wikipedia])

pred = react(question="What is 9362158 divided by the year of birth of David Gregory of Kinnairdy castle?")
print(pred.answer)
dspy.inspect_history(n=50)

In [None]:
class Outline(dspy.Signature):
    """Outline a thorough overview of a topic."""

    topic: str = dspy.InputField()
    title: str = dspy.OutputField()
    sections: list[str] = dspy.OutputField()
    section_subheadings: dict[str, list[str]] = dspy.OutputField(desc="mapping from section headings to subheadings")

class DraftSection(dspy.Signature):
    """Draft a top-level section of an article."""

    topic: str = dspy.InputField()
    section_heading: str = dspy.InputField()
    section_subheadings: list[str] = dspy.InputField()
    content: str = dspy.OutputField(desc="markdown-formatted section")

class DraftArticle(dspy.Module):
    def __init__(self):
        self.build_outline = dspy.ChainOfThought(Outline)
        self.draft_section = dspy.ChainOfThought(DraftSection)

    def forward(self, topic):
        outline = self.build_outline(topic=topic)
        sections = []
        for heading, subheadings in outline.section_subheadings.items():
            section, subheadings = f"## {heading}", [f"### {subheading}" for subheading in subheadings]
            section = self.draft_section(topic=outline.title, section_heading=section, section_subheadings=subheadings)
            sections.append(section.content)
        return dspy.Prediction(title=outline.title, sections=sections)

draft_article = DraftArticle()
article = draft_article(topic="World War 2")
print(article.get_lm_usage())

In [None]:
dspy.inspect_history(n=500)

In [None]:
print(article)

In [None]:
class ExtractInfo(dspy.Signature):
    """Extract structured information from text."""

    text: str = dspy.InputField()
    title: str = dspy.OutputField()
    headings: list[str] = dspy.OutputField()
    entities: list[dict[str, str]] = dspy.OutputField(desc="a list of entities and their metadata")

module = dspy.Predict(ExtractInfo)

text = "Apple Inc. announced its latest iPhone 14 today." \
    "The CEO, Tim Cook, highlighted its new features in a press release."
response = module(text=text)

print(response.title)
print(response.headings)
print(response.entities)
dspy.inspect_history(n=80)

In [None]:
def search_wikipedia(query: str) -> list[str]:
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=3)
    return [x['text'] for x in results]

rag = dspy.ChainOfThought('context, question -> response')

question = "What's the name of the castle that David Gregory inherited?"
rag(context=search_wikipedia(question), question=question)

In [None]:
dspy.inspect_history(n=80)

In [None]:
class CheckCitationFaithfulness(dspy.Signature):
    """Verify that the text is based on the provided context."""

    context: str = dspy.InputField(desc="facts here are assumed to be true")
    text: str = dspy.InputField()
    faithfulness: bool = dspy.OutputField()
    evidence: dict[str, list[str]] = dspy.OutputField(desc="Supporting evidence for claims")

context = "The 21-year-old made seven appearances for the Hammers and netted his only goal for them in a Europa League qualification round match against Andorran side FC Lustrains last season. Lee had two loan spells in League One last term, with Blackpool and then Colchester United. He scored twice for the U's but was unable to save them from relegation. The length of Lee's contract with the promoted Tykes has not been revealed. Find all the latest football transfers on our dedicated page."

text = "Lee scored 3 goals for Colchester United."

faithfulness = dspy.ChainOfThought(CheckCitationFaithfulness)
faithfulness(context=context, text=text)
dspy.inspect_history(n=80)

In [None]:
question = "What's something great about the ColBERT retrieval model?"

# 1) Declare with a signature, and pass some config.
classify = dspy.ChainOfThought('question -> answer', n=5)

# 2) Call with input argument.
response = classify(question=question)

# 3) Access the outputs.
response.completions.answer

dspy.inspect_history(n=80)

In [31]:
import dspy
from dspy.datasets import HotPotQA

# logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
# logging.getLogger("dspy").setLevel(logging.DEBUG)

# root = logging.getLogger()
# root.setLevel(logging.DEBUG)
# handler = logging.StreamHandler(sys.stdout)
# handler.setLevel(logging.DEBUG)
# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# handler.setFormatter(formatter)
# root.addHandler(handler)

mlflow.set_experiment("optimized_react")

def search(query: str) -> list[str]:
    """Retrieves abstracts from Wikipedia."""
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=3)
    return [x['text'] for x in results]

trainset = [x.with_inputs('question') for x in HotPotQA(train_seed=2024, train_size=50).train]
react = dspy.ReAct("question -> answer", tools=[search])

tp = dspy.MIPROv2(metric=dspy.evaluate.answer_exact_match, auto="light", num_threads=24)
optimized_react = tp.compile(react, trainset=trainset,requires_permission_to_run=False)

optimized_react.save(path="/home/noelo/dev/dspy-poc/optimized_react.json")

2025/06/02 09:47:23 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '58b2ce787aa841168c3645a190bc40fe', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current dspy workflow
2025/06/02 09:47:23 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 20
minibatch: False
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 40

2025/06/02 09:47:23 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/06/02 09:47:23 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/06/02 09:47:23 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  8.69it/s]
100%|██████████| 10/10 [00:06<00:00,  1.61it/s]


Bootstrapped 4 full traces after 9 examples for up to 1 rounds, amounting to 10 attempts.


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  8.66it/s]


Bootstrapping set 4/6


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  9.28it/s]
 90%|█████████ | 9/10 [00:05<00:00,  1.73it/s]


Bootstrapped 4 full traces after 9 examples for up to 1 rounds, amounting to 9 attempts.


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  8.75it/s]


Bootstrapping set 5/6


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
 50%|█████     | 5/10 [00:03<00:03,  1.62it/s]


Bootstrapped 2 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  8.03it/s]


Bootstrapping set 6/6


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  9.32it/s]
 60%|██████    | 6/10 [00:03<00:02,  1.73it/s]


Bootstrapped 2 full traces after 6 examples for up to 1 rounds, amounting to 6 attempts.


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  8.90it/s]
2025/06/02 09:47:48 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/06/02 09:47:48 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.
2025/06/02 09:47:49 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...

2025/06/02 09:47:58 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/06/02 09:47:58 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Given the fields `question`, produce the fields `answer`.

You are an Agent. In each episode, you will be given the fields `question` as input. And you can see your past trajectory so far.
Your goal is to use one or more of the supplied tools to collect any necessary information for producing `answer`.

To do this, you w

Average Metric: 8.00 / 40 (20.0%): 100%|██████████| 40/40 [00:07<00:00,  5.50it/s]

2025/06/02 09:48:06 INFO dspy.evaluate.evaluate: Average Metric: 8 / 40 (20.0%)



🏃 View run eval_full_0 at: http://mlflow:8080/#/experiments/34/runs/acac789193cf4da2a0bbe3801e6eab24
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:48:06 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 20.0

2025/06/02 09:48:06 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 20 =====


Average Metric: 4.00 / 40 (10.0%): 100%|██████████| 40/40 [00:08<00:00,  4.45it/s]

2025/06/02 09:48:16 INFO dspy.evaluate.evaluate: Average Metric: 4 / 40 (10.0%)



🏃 View run eval_full_1 at: http://mlflow:8080/#/experiments/34/runs/5e52dee93bfd4730934e95eae486140f
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:48:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 10.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 0'].
2025/06/02 09:48:16 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0]
2025/06/02 09:48:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 20.0


2025/06/02 09:48:16 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 20 =====


Average Metric: 15.00 / 40 (37.5%): 100%|██████████| 40/40 [00:09<00:00,  4.36it/s]

2025/06/02 09:48:26 INFO dspy.evaluate.evaluate: Average Metric: 15 / 40 (37.5%)



🏃 View run eval_full_2 at: http://mlflow:8080/#/experiments/34/runs/14bc367257134a3cbcb33b152bee62bd
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 37.5
2025/06/02 09:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 37.5 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 2'].
2025/06/02 09:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5]
2025/06/02 09:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 37.5


2025/06/02 09:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 20 =====


Average Metric: 4.00 / 40 (10.0%): 100%|██████████| 40/40 [00:08<00:00,  4.70it/s]

2025/06/02 09:48:36 INFO dspy.evaluate.evaluate: Average Metric: 4 / 40 (10.0%)



🏃 View run eval_full_3 at: http://mlflow:8080/#/experiments/34/runs/4a2b51711c074e378237ad45b407bd85
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:48:36 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 10.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 0'].
2025/06/02 09:48:36 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0]
2025/06/02 09:48:36 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 37.5


2025/06/02 09:48:36 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 20 =====


Average Metric: 14.00 / 40 (35.0%): 100%|██████████| 40/40 [00:09<00:00,  4.05it/s]

2025/06/02 09:48:47 INFO dspy.evaluate.evaluate: Average Metric: 14 / 40 (35.0%)



🏃 View run eval_full_4 at: http://mlflow:8080/#/experiments/34/runs/57568a2da1354b709104f44c113075c0
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:48:47 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 35.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 4'].
2025/06/02 09:48:47 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0]
2025/06/02 09:48:47 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 37.5


2025/06/02 09:48:47 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 20 =====


Average Metric: 15.00 / 40 (37.5%): 100%|██████████| 40/40 [00:08<00:00,  4.78it/s]

2025/06/02 09:48:56 INFO dspy.evaluate.evaluate: Average Metric: 15 / 40 (37.5%)



🏃 View run eval_full_5 at: http://mlflow:8080/#/experiments/34/runs/b8a493064a9744659103b269f59cfb6b
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:48:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 37.5 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 2'].
2025/06/02 09:48:57 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5]
2025/06/02 09:48:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 37.5


2025/06/02 09:48:57 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 20 =====


Average Metric: 10.00 / 40 (25.0%): 100%|██████████| 40/40 [00:08<00:00,  4.75it/s]

2025/06/02 09:49:06 INFO dspy.evaluate.evaluate: Average Metric: 10 / 40 (25.0%)



🏃 View run eval_full_6 at: http://mlflow:8080/#/experiments/34/runs/9e2eff88c3364e498454f0e3823c91aa
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:49:06 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/06/02 09:49:06 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0]
2025/06/02 09:49:06 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 37.5


2025/06/02 09:49:06 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 20 =====


Average Metric: 13.00 / 40 (32.5%): 100%|██████████| 40/40 [00:11<00:00,  3.62it/s]

2025/06/02 09:49:18 INFO dspy.evaluate.evaluate: Average Metric: 13 / 40 (32.5%)



🏃 View run eval_full_7 at: http://mlflow:8080/#/experiments/34/runs/996a4983320349c8974dab1a65aa228d
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:49:18 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 32.5 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 1'].
2025/06/02 09:49:18 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5]
2025/06/02 09:49:18 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 37.5


2025/06/02 09:49:18 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 20 =====


Average Metric: 8.00 / 40 (20.0%): 100%|██████████| 40/40 [00:07<00:00,  5.53it/s]

2025/06/02 09:49:26 INFO dspy.evaluate.evaluate: Average Metric: 8 / 40 (20.0%)



🏃 View run eval_full_8 at: http://mlflow:8080/#/experiments/34/runs/927c4786234249629ce49fbbcd581ef8
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:49:27 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 20.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/06/02 09:49:27 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0]
2025/06/02 09:49:27 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 37.5


2025/06/02 09:49:27 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 20 =====


Average Metric: 14.00 / 40 (35.0%): 100%|██████████| 40/40 [00:07<00:00,  5.06it/s]

2025/06/02 09:49:35 INFO dspy.evaluate.evaluate: Average Metric: 14 / 40 (35.0%)



🏃 View run eval_full_9 at: http://mlflow:8080/#/experiments/34/runs/abf540b1d04f48cbb0885273f117685a
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:49:36 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 35.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 4'].
2025/06/02 09:49:36 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0]
2025/06/02 09:49:36 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 37.5


2025/06/02 09:49:36 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 20 =====


Average Metric: 17.00 / 40 (42.5%): 100%|██████████| 40/40 [00:09<00:00,  4.10it/s]

2025/06/02 09:49:46 INFO dspy.evaluate.evaluate: Average Metric: 17 / 40 (42.5%)



🏃 View run eval_full_10 at: http://mlflow:8080/#/experiments/34/runs/2718751c881747718c15dffff4cced6f
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:49:47 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 42.5
2025/06/02 09:49:47 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 42.5 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 2'].
2025/06/02 09:49:47 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5]
2025/06/02 09:49:47 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:49:47 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 12 / 20 =====


Average Metric: 17.00 / 40 (42.5%): 100%|██████████| 40/40 [00:08<00:00,  4.59it/s]

2025/06/02 09:49:56 INFO dspy.evaluate.evaluate: Average Metric: 17 / 40 (42.5%)



🏃 View run eval_full_11 at: http://mlflow:8080/#/experiments/34/runs/404c3bdbad0b4f83814576b9038929d7
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 42.5 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 3'].
2025/06/02 09:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5]
2025/06/02 09:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 20 =====


Average Metric: 17.00 / 40 (42.5%): 100%|██████████| 40/40 [00:09<00:00,  4.11it/s]

2025/06/02 09:50:07 INFO dspy.evaluate.evaluate: Average Metric: 17 / 40 (42.5%)



🏃 View run eval_full_12 at: http://mlflow:8080/#/experiments/34/runs/168629492c3743bda437451cfdbea958
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 42.5 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 3'].
2025/06/02 09:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5]
2025/06/02 09:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 14 / 20 =====


Average Metric: 17.00 / 40 (42.5%): 100%|██████████| 40/40 [00:08<00:00,  4.72it/s]

2025/06/02 09:50:17 INFO dspy.evaluate.evaluate: Average Metric: 17 / 40 (42.5%)



🏃 View run eval_full_13 at: http://mlflow:8080/#/experiments/34/runs/68757a88e9d6418d8dee0c5831475c28
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:50:17 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 42.5 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 2'].
2025/06/02 09:50:17 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5, 42.5]
2025/06/02 09:50:17 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:50:17 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 15 / 20 =====


Average Metric: 16.00 / 40 (40.0%): 100%|██████████| 40/40 [00:08<00:00,  4.72it/s]

2025/06/02 09:50:26 INFO dspy.evaluate.evaluate: Average Metric: 16 / 40 (40.0%)



🏃 View run eval_full_14 at: http://mlflow:8080/#/experiments/34/runs/4dd2fcf5a4464304a359d5e82e77aaa5
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:50:27 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 40.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 1', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 5'].
2025/06/02 09:50:27 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5, 42.5, 40.0]
2025/06/02 09:50:27 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:50:27 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 16 / 20 =====


Average Metric: 2.00 / 40 (5.0%): 100%|██████████| 40/40 [00:08<00:00,  4.59it/s] 

2025/06/02 09:50:36 INFO dspy.evaluate.evaluate: Average Metric: 2 / 40 (5.0%)



🏃 View run eval_full_15 at: http://mlflow:8080/#/experiments/34/runs/f6d97cdf9b5b402abc733c7c17adc152
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:50:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 5.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 0'].
2025/06/02 09:50:37 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5, 42.5, 40.0, 5.0]
2025/06/02 09:50:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:50:37 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 17 / 20 =====


Average Metric: 16.00 / 40 (40.0%): 100%|██████████| 40/40 [00:09<00:00,  4.07it/s]

2025/06/02 09:50:47 INFO dspy.evaluate.evaluate: Average Metric: 16 / 40 (40.0%)



🏃 View run eval_full_16 at: http://mlflow:8080/#/experiments/34/runs/a967adf1a3ab487fa5bf93d229af0bb3
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:50:48 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 40.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 5'].
2025/06/02 09:50:48 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5, 42.5, 40.0, 5.0, 40.0]
2025/06/02 09:50:48 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:50:48 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 18 / 20 =====


Average Metric: 17.00 / 40 (42.5%): 100%|██████████| 40/40 [00:10<00:00,  3.89it/s]

2025/06/02 09:50:59 INFO dspy.evaluate.evaluate: Average Metric: 17 / 40 (42.5%)



🏃 View run eval_full_17 at: http://mlflow:8080/#/experiments/34/runs/2c960556daed466cad0758978a846050
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:50:59 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 42.5 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 4'].
2025/06/02 09:50:59 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5, 42.5, 40.0, 5.0, 40.0, 42.5]
2025/06/02 09:50:59 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:50:59 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 19 / 20 =====


Average Metric: 16.00 / 40 (40.0%): 100%|██████████| 40/40 [00:09<00:00,  4.18it/s]

2025/06/02 09:51:09 INFO dspy.evaluate.evaluate: Average Metric: 16 / 40 (40.0%)



🏃 View run eval_full_18 at: http://mlflow:8080/#/experiments/34/runs/f5cc33761fe14c91ac190908e8e4ff38
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:51:10 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 40.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 1'].
2025/06/02 09:51:10 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5, 42.5, 40.0, 5.0, 40.0, 42.5, 40.0]
2025/06/02 09:51:10 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:51:10 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 20 / 20 =====


Average Metric: 1.00 / 40 (2.5%): 100%|██████████| 40/40 [00:08<00:00,  4.45it/s]

2025/06/02 09:51:20 INFO dspy.evaluate.evaluate: Average Metric: 1 / 40 (2.5%)



🏃 View run eval_full_19 at: http://mlflow:8080/#/experiments/34/runs/acba27b579bc4cd48c69e4e40cf2459b
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:51:20 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 2.5 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 0'].
2025/06/02 09:51:20 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5, 42.5, 40.0, 5.0, 40.0, 42.5, 40.0, 2.5]
2025/06/02 09:51:20 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:51:20 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 21 / 20 =====


Average Metric: 15.00 / 40 (37.5%): 100%|██████████| 40/40 [00:10<00:00,  3.97it/s]

2025/06/02 09:51:31 INFO dspy.evaluate.evaluate: Average Metric: 15 / 40 (37.5%)



🏃 View run eval_full_20 at: http://mlflow:8080/#/experiments/34/runs/2c9acc44e7d348a7a7f830e55331ea65
🧪 View experiment at: http://mlflow:8080/#/experiments/34


2025/06/02 09:51:32 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 37.5 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 3', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 3'].
2025/06/02 09:51:32 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [20.0, 10.0, 37.5, 10.0, 35.0, 37.5, 25.0, 32.5, 20.0, 35.0, 42.5, 42.5, 42.5, 42.5, 40.0, 5.0, 40.0, 42.5, 40.0, 2.5, 37.5]
2025/06/02 09:51:32 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 42.5


2025/06/02 09:51:32 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 42.5!
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  6.09it/s]


🏃 View run funny-donkey-48 at: http://mlflow:8080/#/experiments/34/runs/58b2ce787aa841168c3645a190bc40fe
🧪 View experiment at: http://mlflow:8080/#/experiments/34


In [32]:
print(optimized_react)

react = Predict(StringSignature(question, trajectory -> next_thought, next_tool_name, next_tool_args
    instructions="Given the fields `question`, produce the fields `answer`.\n\nYou are an Agent. In each episode, you will be given the fields `question` as input. You will also have access to your past trajectory, allowing you to build upon previous actions and observations.\n\nYour goal is to use one or more of the supplied tools to gather the necessary information for producing an accurate `answer`.\n\nTo achieve this, you will interleave `next_thought`, `next_tool_name`, and `next_tool_args` in each turn, and also when concluding the task. After each tool call, you will receive an observation, which will be added to your trajectory.\n\nWhen crafting `next_thought`, you may analyze the current situation and devise a plan for future actions. When selecting the `next_tool_name` and its `next_tool_args`, ensure the tool is one of:\n\n1. `search`, which retrieves abstracts from Wikipedia

In [33]:
print(react)

react = Predict(StringSignature(question, trajectory -> next_thought, next_tool_name, next_tool_args
    instructions="Given the fields `question`, produce the fields `answer`.\n\nYou are an Agent. In each episode, you will be given the fields `question` as input. And you can see your past trajectory so far.\nYour goal is to use one or more of the supplied tools to collect any necessary information for producing `answer`.\n\nTo do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.\nAfter each tool call, you receive a resulting observation, which gets appended to your trajectory.\n\nWhen writing next_thought, you may reason about the current situation and plan for future steps.\nWhen selecting the next_tool_name and its next_tool_args, the tool must be one of:\n\n(1) search, whose description is <desc>Retrieves abstracts from Wikipedia.</desc>. It takes arguments {'query': {'type': 'string'}} in JSON format.\n(2) fi