In [3]:
import dspy

gpt41mini = dspy.LM('azure/gpt-4.1-mini', temperature=0.7)
gpt41 = dspy.LM('azure/gpt-4.1', temperature=0.7)

dspy.configure(lm=gpt41mini)

In [4]:
import random
from dspy.datasets import DataLoader

kwargs = dict(fields=("claim", "supporting_facts", "hpqa_id", "num_hops"), input_keys=("claim",))
hover = DataLoader().from_huggingface(dataset_name="hover-nlp/hover", split="train", trust_remote_code=True, **kwargs)

hpqa_ids = set()
hover = [
    dspy.Example(claim=x.claim, titles=list(set([y["key"] for y in x.supporting_facts]))).with_inputs("claim")
    for x in hover
    if x["num_hops"] == 3 and x["hpqa_id"] not in hpqa_ids and not hpqa_ids.add(x["hpqa_id"])
]

random.Random(0).shuffle(hover)
trainset, devset, testset = hover[:100], hover[100:200], hover[650:]

Downloading data: 100%|██████████| 9.21M/9.21M [00:04<00:00, 2.20MB/s]
Downloading data: 100%|██████████| 2.15M/2.15M [00:01<00:00, 1.83MB/s]
Downloading data: 100%|██████████| 899k/899k [00:00<00:00, 2.82MB/s]
Generating train split: 100%|██████████| 18171/18171 [00:00<00:00, 28656.99 examples/s]
Generating validation split: 100%|██████████| 4000/4000 [00:00<00:00, 36888.41 examples/s]
Generating test split: 100%|██████████| 4000/4000 [00:00<00:00, 51961.63 examples/s]


In [5]:
example = trainset[0]

print("Claim:", example.claim)
print("Pages that must be retrieved:", example.titles)

Claim: This director is known for his work on Miss Potter. The Academy of Motion Picture Arts and Sciences presents the award in which he was nominated for his work in "Babe".
Pages that must be retrieved: ['Chris Noonan', 'Miss Potter', 'Academy Award for Best Director']


In [6]:
DOCS = {}

def search(query: str, k: int) -> list[str]:
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=k)
    results = [x['text'] for x in results]

    for result in results:
        title, text = result.split(" | ", 1)
        DOCS[title] = text

    return results

In [7]:
def search_wikipedia(query: str) -> list[str]:
    """Returns top-5 results and then the titles of the top-5 to top-30 results."""

    topK = search(query, 30)
    titles, topK = [f"`{x.split(' | ')[0]}`" for x in topK[5:30]], topK[:5]
    return topK + [f"Other retrieved pages have titles: {', '.join(titles)}."]

def lookup_wikipedia(title: str) -> str:
    """Returns the text of the Wikipedia page, if it exists."""

    if title in DOCS:
        return DOCS[title]

    results = [x for x in search(title, 10) if x.startswith(title + " | ")]
    if not results:
        return f"No Wikipedia page found for title: {title}"
    return results[0]

In [8]:
instructions = "Find all Wikipedia titles relevant to verifying (or refuting) the claim."
signature = dspy.Signature("claim -> titles: list[str]", instructions)
react = dspy.ReAct(signature, tools=[search_wikipedia, lookup_wikipedia], max_iters=20)

In [9]:
react(claim="David Gregory was born in 1625.").titles[:3]

['David Gregory (physician)']

In [10]:
def top5_recall(example, pred, trace=None):
    gold_titles = example.titles
    recall = sum(x in pred.titles[:5] for x in gold_titles) / len(gold_titles)

    # If we're "bootstrapping" for optimization, return True if and only if the recall is perfect.
    if trace is not None:
        return recall >= 1.0
    
    # If we're just doing inference, just measure the recall.
    return recall

evaluate = dspy.Evaluate(devset=devset, metric=top5_recall, num_threads=16, display_progress=True, display_table=5)

In [11]:
def safe_react(claim: str):
    try:
        return react(claim=claim)
    except Exception as e:
        return dspy.Prediction(titles=[])

evaluate(safe_react)

Average Metric: 86.33 / 100 (86.3%): 100%|██████████| 100/100 [02:42<00:00,  1.63s/it]

2025/05/01 14:04:39 INFO dspy.evaluate.evaluate: Average Metric: 86.33333333333333 / 100 (86.3%)





Unnamed: 0,claim,example_titles,trajectory,reasoning,pred_titles,top5_recall
0,The Church of England's movement that inspired the Trinity Episcop...,"[Trinity Episcopal Church (Houghton, Michigan), Oxford Movement, S...","{'thought_0': ""To verify this claim, I need to identify the Church...",The claim states that the Church of England's movement inspiring t...,"[Trinity Episcopal Church (Houghton, Michigan), Oxford Movement, S...",✔️ [1.000]
1,"Red, White & Crüe and this athlete both fight. The french fighter ...","[Mike Tyson, Bobby Stewart, Red, White &amp; Crüe]","{'thought_0': 'The claim references ""Red, White & Crüe"" and a Fren...","The claim connects ""Red, White & Crüe,"" which is identified as an ...","[Red, White & Crüe, Bobby Stewart, Mike Tyson, Christophe Mendy, F...",✔️ [0.667]
2,The writer/director/actor from Glen or Glenda and Fernand Rivers s...,"[Ed Wood, Glen or Glenda, Fernand Rivers]","{'thought_0': 'To verify the claim, I need to first identify the w...","The claim states that the writer/director/actor from ""Glen or Glen...","[Glen or Glenda, Ed Wood, Fernand Rivers]",✔️ [1.000]
3,The film by Sandi Sissel was released before The End of Suburbia.,"[The End of Suburbia, Sandi Sissel, Chicken Ranch (film)]","{'thought_0': 'To verify the claim, I need to find the film direct...","The claim states that a film by Sandi Sissel was released before ""...","[Sandi Sissel, The End of Suburbia, Chicken Ranch (film)]",✔️ [1.000]
4,The actor who played captain hook in the live production with Tayl...,"[Christopher Walken, Peter Pan Live!, Taylor Louderman]","{'thought_0': 'To verify this claim, I need to identify the actor ...",The claim is that the actor who played Captain Hook in the live pr...,"[Taylor Louderman, Peter Pan Live!, Christopher Walken, The Deer H...",✔️ [1.000]


86.33

In [12]:
kwargs = dict(teacher_settings=dict(lm=gpt41), prompt_model=gpt41, max_errors=999)

tp = dspy.MIPROv2(metric=top5_recall, auto="medium", num_threads=16, **kwargs)
optimized_react = tp.compile(react, trainset=trainset, max_bootstrapped_demos=3, max_labeled_demos=0)

2025/05/01 14:06:40 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING MEDIUM AUTO RUN SETTINGS:
num_trials: 25
minibatch: True
num_candidates: 9
valset size: 80

2025/05/01 14:06:56 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/05/01 14:06:56 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/05/01 14:06:56 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=9 sets of demonstrations...


Bootstrapping set 1/9
Bootstrapping set 2/9


 40%|████      | 8/20 [01:33<02:19, 11.64s/it]


Bootstrapped 3 full traces after 8 examples for up to 1 rounds, amounting to 8 attempts.
Bootstrapping set 3/9


 15%|█▌        | 3/20 [00:27<02:36,  9.20s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/9


 10%|█         | 2/20 [00:09<01:24,  4.68s/it]


Bootstrapped 1 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 5/9


 15%|█▌        | 3/20 [00:00<00:00, 50.54it/s]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 6/9


 25%|██▌       | 5/20 [00:48<02:26,  9.78s/it]


Bootstrapped 3 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Bootstrapping set 7/9


 15%|█▌        | 3/20 [01:11<06:42, 23.68s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 8/9


 10%|█         | 2/20 [00:00<00:00, 59.68it/s]


Bootstrapped 1 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 9/9


 25%|██▌       | 5/20 [00:56<02:50, 11.36s/it]
2025/05/01 14:12:03 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/05/01 14:12:03 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 3 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.


2025/05/01 14:12:21 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2025/05/01 14:16:57 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/05/01 14:16:57 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Find all Wikipedia titles relevant to verifying (or refuting) the claim.

You are an Agent. In each episode, you will be given the fields `claim` as input. And you can see your past trajectory so far.
Your goal is to use one or more of the supplied tools to collect any necessary information for producing `titles`.

To do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.
After each tool call, you receive a resulting observation, which gets appended to your trajectory.

When writing next_thought, you may reason about the current situation and plan for future steps.
When selecting the next_tool_name and its next_tool_args, the tool must be one of:

(1) search_wikipedi

Average Metric: 68.00 / 80 (85.0%): 100%|██████████| 80/80 [02:22<00:00,  1.78s/it]

2025/05/01 14:19:19 INFO dspy.evaluate.evaluate: Average Metric: 68.0 / 80 (85.0%)
2025/05/01 14:19:19 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 85.0

2025/05/01 14:19:19 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 31 - Minibatch ==



Average Metric: 29.33 / 35 (83.8%): 100%|██████████| 35/35 [01:17<00:00,  2.22s/it]

2025/05/01 14:20:37 INFO dspy.evaluate.evaluate: Average Metric: 29.333333333333332 / 35 (83.8%)
2025/05/01 14:20:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 83.81 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3', 'Predictor 1: Instruction 8', 'Predictor 1: Few-Shot Set 5'].
2025/05/01 14:20:37 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81]
2025/05/01 14:20:37 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0]
2025/05/01 14:20:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:20:37 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 31 - Minibatch ==



Average Metric: 29.00 / 35 (82.9%): 100%|██████████| 35/35 [00:42<00:00,  1.20s/it]

2025/05/01 14:21:19 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 35 (82.9%)
2025/05/01 14:21:19 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 82.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 3', 'Predictor 1: Instruction 8', 'Predictor 1: Few-Shot Set 1'].
2025/05/01 14:21:19 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86]
2025/05/01 14:21:19 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0]
2025/05/01 14:21:19 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:21:19 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 31 - Minibatch ==



Average Metric: 30.00 / 35 (85.7%): 100%|██████████| 35/35 [02:01<00:00,  3.48s/it]

2025/05/01 14:23:21 INFO dspy.evaluate.evaluate: Average Metric: 30.0 / 35 (85.7%)
2025/05/01 14:23:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.71 on minibatch of size 35 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 8', 'Predictor 1: Few-Shot Set 3'].
2025/05/01 14:23:21 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71]
2025/05/01 14:23:21 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0]
2025/05/01 14:23:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:23:21 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 31 - Minibatch ==



Average Metric: 29.33 / 35 (83.8%): 100%|██████████| 35/35 [01:55<00:00,  3.31s/it]

2025/05/01 14:25:16 INFO dspy.evaluate.evaluate: Average Metric: 29.333333333333332 / 35 (83.8%)
2025/05/01 14:25:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 83.81 on minibatch of size 35 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 3', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:25:16 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81]
2025/05/01 14:25:16 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0]
2025/05/01 14:25:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:25:16 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 31 - Minibatch ==



Average Metric: 29.00 / 35 (82.9%): 100%|██████████| 35/35 [01:36<00:00,  2.74s/it]

2025/05/01 14:26:52 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 35 (82.9%)
2025/05/01 14:26:52 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 82.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 7', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 6'].
2025/05/01 14:26:52 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86]
2025/05/01 14:26:52 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0]
2025/05/01 14:26:52 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:26:52 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 31 - Full Evaluation =====
2025/05/01 14:26:52 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 85.71) from minibatch trials...



Average Metric: 64.33 / 80 (80.4%): 100%|██████████| 80/80 [02:14<00:00,  1.68s/it] 

2025/05/01 14:29:07 INFO dspy.evaluate.evaluate: Average Metric: 64.33333333333333 / 80 (80.4%)
2025/05/01 14:29:07 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42]
2025/05/01 14:29:07 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0
2025/05/01 14:29:07 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/05/01 14:29:07 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 8 / 31 - Minibatch ==



Average Metric: 29.00 / 35 (82.9%): 100%|██████████| 35/35 [01:33<00:00,  2.67s/it]

2025/05/01 14:30:41 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 35 (82.9%)
2025/05/01 14:30:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 82.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 7', 'Predictor 1: Few-Shot Set 3'].
2025/05/01 14:30:41 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86]
2025/05/01 14:30:41 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42]
2025/05/01 14:30:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:30:41 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 9 / 31 - Minibatch ==



Average Metric: 29.67 / 35 (84.8%): 100%|██████████| 35/35 [01:19<00:00,  2.28s/it]

2025/05/01 14:32:00 INFO dspy.evaluate.evaluate: Average Metric: 29.666666666666668 / 35 (84.8%)
2025/05/01 14:32:00 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 84.76 on minibatch of size 35 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 7', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 3'].
2025/05/01 14:32:00 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76]
2025/05/01 14:32:00 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42]
2025/05/01 14:32:00 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:32:01 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 10 / 31 - Minibatch ==



Average Metric: 29.33 / 35 (83.8%): 100%|██████████| 35/35 [01:11<00:00,  2.03s/it]

2025/05/01 14:33:12 INFO dspy.evaluate.evaluate: Average Metric: 29.333333333333332 / 35 (83.8%)
2025/05/01 14:33:12 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 83.81 on minibatch of size 35 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 3', 'Predictor 1: Instruction 7', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:33:12 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81]
2025/05/01 14:33:12 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42]
2025/05/01 14:33:12 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:33:12 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 11 / 31 - Minibatch ==



Average Metric: 28.67 / 35 (81.9%): 100%|██████████| 35/35 [01:53<00:00,  3.23s/it]

2025/05/01 14:35:05 INFO dspy.evaluate.evaluate: Average Metric: 28.666666666666668 / 35 (81.9%)
2025/05/01 14:35:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 81.9 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 8', 'Predictor 1: Few-Shot Set 3'].
2025/05/01 14:35:05 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9]
2025/05/01 14:35:05 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42]
2025/05/01 14:35:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:35:05 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 12 / 31 - Minibatch ==



Average Metric: 31.33 / 35 (89.5%): 100%|██████████| 35/35 [01:35<00:00,  2.72s/it]

2025/05/01 14:36:40 INFO dspy.evaluate.evaluate: Average Metric: 31.333333333333332 / 35 (89.5%)
2025/05/01 14:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 89.52 on minibatch of size 35 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52]
2025/05/01 14:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42]
2025/05/01 14:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 85.0


2025/05/01 14:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 31 - Full Evaluation =====
2025/05/01 14:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 89.52) from minibatch trials...



Average Metric: 69.33 / 80 (86.7%): 100%|██████████| 80/80 [01:31<00:00,  1.14s/it]

2025/05/01 14:38:12 INFO dspy.evaluate.evaluate: Average Metric: 69.33333333333333 / 80 (86.7%)
2025/05/01 14:38:12 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 86.67
2025/05/01 14:38:12 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67]
2025/05/01 14:38:12 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67
2025/05/01 14:38:12 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/05/01 14:38:12 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 14 / 31 - Minibatch ==



Average Metric: 28.67 / 35 (81.9%): 100%|██████████| 35/35 [01:20<00:00,  2.31s/it]

2025/05/01 14:39:33 INFO dspy.evaluate.evaluate: Average Metric: 28.666666666666668 / 35 (81.9%)
2025/05/01 14:39:33 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 81.9 on minibatch of size 35 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 2'].
2025/05/01 14:39:33 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9]
2025/05/01 14:39:33 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67]
2025/05/01 14:39:33 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:39:33 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 15 / 31 - Minibatch ==



Average Metric: 26.00 / 35 (74.3%): 100%|██████████| 35/35 [01:21<00:00,  2.34s/it]

2025/05/01 14:40:55 INFO dspy.evaluate.evaluate: Average Metric: 26.0 / 35 (74.3%)
2025/05/01 14:40:55 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 74.29 on minibatch of size 35 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 1', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:40:55 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29]
2025/05/01 14:40:55 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67]
2025/05/01 14:40:55 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:40:55 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 16 / 31 - Minibatch ==



Average Metric: 29.00 / 35 (82.9%): 100%|██████████| 35/35 [01:20<00:00,  2.30s/it]

2025/05/01 14:42:16 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 35 (82.9%)
2025/05/01 14:42:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 82.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 6', 'Predictor 1: Instruction 4', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:42:16 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86]
2025/05/01 14:42:16 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67]
2025/05/01 14:42:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:42:16 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 17 / 31 - Minibatch ==



Average Metric: 27.67 / 35 (79.0%): 100%|██████████| 35/35 [01:56<00:00,  3.32s/it]

2025/05/01 14:44:12 INFO dspy.evaluate.evaluate: Average Metric: 27.666666666666668 / 35 (79.0%)
2025/05/01 14:44:12 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 79.05 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 6', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 8'].
2025/05/01 14:44:12 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05]
2025/05/01 14:44:12 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67]
2025/05/01 14:44:12 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:44:12 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 18 / 31 - Minibatch ==



Average Metric: 30.00 / 35 (85.7%): 100%|██████████| 35/35 [00:07<00:00,  4.89it/s]

2025/05/01 14:44:20 INFO dspy.evaluate.evaluate: Average Metric: 30.0 / 35 (85.7%)
2025/05/01 14:44:20 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.71 on minibatch of size 35 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 7'].
2025/05/01 14:44:20 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71]
2025/05/01 14:44:20 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67]
2025/05/01 14:44:20 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:44:20 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 19 / 31 - Full Evaluation =====
2025/05/01 14:44:20 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 85.71) from minibatch trials...



Average Metric: 69.00 / 80 (86.2%): 100%|██████████| 80/80 [00:10<00:00,  7.59it/s]

2025/05/01 14:44:31 INFO dspy.evaluate.evaluate: Average Metric: 69.0 / 80 (86.2%)
2025/05/01 14:44:31 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25]
2025/05/01 14:44:31 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67
2025/05/01 14:44:31 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/05/01 14:44:31 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 20 / 31 - Minibatch ==



Average Metric: 28.00 / 35 (80.0%): 100%|██████████| 35/35 [01:59<00:00,  3.40s/it]

2025/05/01 14:46:31 INFO dspy.evaluate.evaluate: Average Metric: 28.0 / 35 (80.0%)
2025/05/01 14:46:31 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 on minibatch of size 35 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 5', 'Predictor 1: Few-Shot Set 8'].
2025/05/01 14:46:31 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0]
2025/05/01 14:46:31 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25]
2025/05/01 14:46:31 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:46:31 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 21 / 31 - Minibatch ==



Average Metric: 29.67 / 35 (84.8%): 100%|██████████| 35/35 [01:29<00:00,  2.56s/it]

2025/05/01 14:48:00 INFO dspy.evaluate.evaluate: Average Metric: 29.666666666666668 / 35 (84.8%)
2025/05/01 14:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 84.76 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 6', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 5'].
2025/05/01 14:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76]
2025/05/01 14:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25]
2025/05/01 14:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 22 / 31 - Minibatch ==



Average Metric: 28.33 / 35 (81.0%): 100%|██████████| 35/35 [01:56<00:00,  3.34s/it]

2025/05/01 14:49:57 INFO dspy.evaluate.evaluate: Average Metric: 28.333333333333332 / 35 (81.0%)
2025/05/01 14:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.95 on minibatch of size 35 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 6', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76, 80.95]
2025/05/01 14:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25]
2025/05/01 14:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:49:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 23 / 31 - Minibatch ==



Average Metric: 30.33 / 35 (86.7%): 100%|██████████| 35/35 [00:08<00:00,  4.06it/s]

2025/05/01 14:50:07 INFO dspy.evaluate.evaluate: Average Metric: 30.333333333333332 / 35 (86.7%)
2025/05/01 14:50:07 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 86.67 on minibatch of size 35 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 8', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:50:07 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76, 80.95, 86.67]
2025/05/01 14:50:07 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25]
2025/05/01 14:50:07 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:50:07 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 24 / 31 - Minibatch ==



Average Metric: 31.00 / 35 (88.6%): 100%|██████████| 35/35 [00:05<00:00,  6.34it/s]

2025/05/01 14:50:13 INFO dspy.evaluate.evaluate: Average Metric: 31.0 / 35 (88.6%)
2025/05/01 14:50:13 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.57 on minibatch of size 35 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 8', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:50:13 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76, 80.95, 86.67, 88.57]
2025/05/01 14:50:13 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25]
2025/05/01 14:50:13 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:50:13 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 25 / 31 - Full Evaluation =====
2025/05/01 14:50:13 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 87.62) from minibatch tr


Average Metric: 68.33 / 80 (85.4%): 100%|██████████| 80/80 [00:08<00:00,  9.69it/s]

2025/05/01 14:50:22 INFO dspy.evaluate.evaluate: Average Metric: 68.33333333333333 / 80 (85.4%)
2025/05/01 14:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25, 85.42]
2025/05/01 14:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67
2025/05/01 14:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/05/01 14:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 26 / 31 - Minibatch ==



Average Metric: 28.67 / 35 (81.9%): 100%|██████████| 35/35 [02:19<00:00,  3.99s/it]

2025/05/01 14:52:42 INFO dspy.evaluate.evaluate: Average Metric: 28.666666666666664 / 35 (81.9%)
2025/05/01 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 81.9 on minibatch of size 35 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 5', 'Predictor 1: Few-Shot Set 4'].
2025/05/01 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76, 80.95, 86.67, 88.57, 81.9]
2025/05/01 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25, 85.42]
2025/05/01 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 27 / 31 - Minibatch ==



Average Metric: 29.00 / 35 (82.9%): 100%|██████████| 35/35 [01:39<00:00,  2.83s/it]

2025/05/01 14:54:21 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 35 (82.9%)
2025/05/01 14:54:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 82.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 0'].
2025/05/01 14:54:21 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76, 80.95, 86.67, 88.57, 81.9, 82.86]
2025/05/01 14:54:21 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25, 85.42]
2025/05/01 14:54:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:54:21 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 28 / 31 - Minibatch ==



Average Metric: 30.33 / 35 (86.7%): 100%|██████████| 35/35 [01:08<00:00,  1.95s/it]

2025/05/01 14:55:29 INFO dspy.evaluate.evaluate: Average Metric: 30.333333333333332 / 35 (86.7%)
2025/05/01 14:55:29 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 86.67 on minibatch of size 35 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 5'].
2025/05/01 14:55:29 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76, 80.95, 86.67, 88.57, 81.9, 82.86, 86.67]
2025/05/01 14:55:29 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25, 85.42]
2025/05/01 14:55:29 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:55:29 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 29 / 31 - Minibatch ==



Average Metric: 25.33 / 35 (72.4%): 100%|██████████| 35/35 [01:58<00:00,  3.39s/it]

2025/05/01 14:57:28 INFO dspy.evaluate.evaluate: Average Metric: 25.333333333333332 / 35 (72.4%)
2025/05/01 14:57:28 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 72.38 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 4'].
2025/05/01 14:57:28 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76, 80.95, 86.67, 88.57, 81.9, 82.86, 86.67, 72.38]
2025/05/01 14:57:28 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25, 85.42]
2025/05/01 14:57:28 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:57:28 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 30 / 31 - Minibatch ==



Average Metric: 28.67 / 35 (81.9%): 100%|██████████| 35/35 [01:46<00:00,  3.06s/it]

2025/05/01 14:59:15 INFO dspy.evaluate.evaluate: Average Metric: 28.666666666666668 / 35 (81.9%)
2025/05/01 14:59:15 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 81.9 on minibatch of size 35 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 8', 'Predictor 1: Few-Shot Set 4'].
2025/05/01 14:59:15 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [83.81, 82.86, 85.71, 83.81, 82.86, 82.86, 84.76, 83.81, 81.9, 89.52, 81.9, 74.29, 82.86, 79.05, 85.71, 80.0, 84.76, 80.95, 86.67, 88.57, 81.9, 82.86, 86.67, 72.38, 81.9]
2025/05/01 14:59:15 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25, 85.42]
2025/05/01 14:59:15 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67


2025/05/01 14:59:15 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 31 / 31 - Full Evaluation =====
2025/05/01 14:59:15 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top a


Average Metric: 66.33 / 80 (82.9%): 100%|██████████| 80/80 [01:46<00:00,  1.33s/it]

2025/05/01 15:01:02 INFO dspy.evaluate.evaluate: Average Metric: 66.33333333333333 / 80 (82.9%)
2025/05/01 15:01:02 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [85.0, 80.42, 86.67, 86.25, 85.42, 82.92]
2025/05/01 15:01:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 86.67
2025/05/01 15:01:02 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/05/01 15:01:02 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 86.67!





In [13]:
evaluate(optimized_react)

Average Metric: 87.67 / 100 (87.7%): 100%|██████████| 100/100 [04:15<00:00,  2.55s/it]

2025/05/01 15:07:07 INFO dspy.evaluate.evaluate: Average Metric: 87.66666666666667 / 100 (87.7%)





Unnamed: 0,claim,example_titles,trajectory,reasoning,pred_titles,top5_recall
0,The Church of England's movement that inspired the Trinity Episcop...,"[Trinity Episcopal Church (Houghton, Michigan), Oxford Movement, S...",{'thought_0': 'The claim involves several specific elements: a Chu...,The claim is that the Church of England's movement inspiring the T...,"[Trinity Episcopal Church (Houghton, Michigan), Oxford Movement, S...",✔️ [1.000]
1,"Red, White & Crüe and this athlete both fight. The french fighter ...","[Mike Tyson, Bobby Stewart, Red, White &amp; Crüe]","{'thought_0': 'The claim mentions ""Red, White & Crüe"" and a French...","The claim asserts that ""Red, White & Crüe and this athlete both fi...","[Red, White & Crüe, Bobby Stewart, Jean Baptiste Mendy, Mathieu Ba...",✔️ [0.333]
2,The writer/director/actor from Glen or Glenda and Fernand Rivers s...,"[Ed Wood, Glen or Glenda, Fernand Rivers]","{'thought_0': 'To verify the claim, I need to identify the writer/...","The claim is that the writer/director/actor from ""Glen or Glenda"" ...","[Ed Wood, Fernand Rivers, Glen or Glenda]",✔️ [1.000]
3,The film by Sandi Sissel was released before The End of Suburbia.,"[The End of Suburbia, Sandi Sissel, Chicken Ranch (film)]","{'thought_0': 'To verify the claim, I need to find out the release...",The claim states that the film by Sandi Sissel was released before...,"[Chicken Ranch (film), The End of Suburbia]",✔️ [0.667]
4,The actor who played captain hook in the live production with Tayl...,"[Christopher Walken, Peter Pan Live!, Taylor Louderman]","{'thought_0': 'To verify the claim, I need to identify the actor w...",The claim states that the actor who played Captain Hook in the liv...,"[Taylor Louderman, Peter Pan Live!, Christopher Walken, The Deer H...",✔️ [1.000]


87.67

In [14]:
optimized_react(claim="The author of the 1960s unproduced script written for The Beatles, Up Against It, and Bernard-Marie Koltès are both playwrights.").titles

['Up Against It', 'Joe Orton', 'Bernard-Marie Koltès']

In [15]:
dspy.inspect_history(n=2)





[34m[2025-05-01T15:07:35.855733][0m

[31mSystem message:[0m

Your input fields are:
1. `claim` (str)
2. `trajectory` (str)
Your output fields are:
1. `next_thought` (str)
2. `next_tool_name` (Literal['search_wikipedia', 'lookup_wikipedia', 'finish'])
3. `next_tool_args` (dict[str, Any])
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## claim ## ]]
{claim}

[[ ## trajectory ## ]]
{trajectory}

[[ ## next_thought ## ]]
{next_thought}

[[ ## next_tool_name ## ]]
{next_tool_name}        # note: the value you produce must exactly match (no extra characters) one of: search_wikipedia; lookup_wikipedia; finish

[[ ## next_tool_args ## ]]
{next_tool_args}        # note: the value you produce must adhere to the JSON schema: {"type": "object", "additionalProperties": true}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Given a claim and the current trajectory (which includes your previous thoughts, to

In [None]:
optimized_react.save("optimized_react.json")

loaded_react = dspy.ReAct("claim -> titles: list[str]", tools=[search_wikipedia, lookup_wikipedia], max_iters=20)
loaded_react.load("optimized_react.json")

loaded_react(claim="The author of the 1960s unproduced script written for The Beatles, Up Against It, and Bernard-Marie Koltès are both playwrights.").titles