# DSPy Tutorials

From dspy.ai - these tutorials demonstrate how to build DSPy programs to perform various tasks.

In [3]:
import os
import dspy

from dotenv import load_dotenv
load_dotenv("grok_key.ini") 

lm = dspy.LM('xai/grok-3-mini', api_key=os.environ['XAI_API_KEY'])
# lm = dspy.LM('ollama_chat/devstral', api_base='http://localhost:11434', api_key='')
dspy.configure(lm=lm)

In [4]:
# Example 1: Simple Question Answering
math = dspy.ChainOfThought("question -> answer: float")
math(question="Two dice are tossed. What is the probability that the sum equals two?")


Prediction(
    reasoning='To determine the probability that the sum of two tossed dice equals two, first identify the total possible outcomes. Each die has 6 faces, so for two dice, there are 6 * 6 = 36 possible outcomes. For the sum to be exactly 2, both dice must show a 1 (since 1 + 1 = 2). There is only 1 favorable outcome. Therefore, the probability is the number of favorable outcomes divided by the total outcomes: 1 / 36, which equals approximately 0.027777777777777776.',
    answer=0.027777777777777776
)

In [5]:
# Example 2: RAG with Retrieval
def search_wikipedia(query: str) -> list[str]:
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=3)
    return [x['text'] for x in results]

rag = dspy.ChainOfThought('context, question -> response')

question = "What's the name of the castle that David Gregory inherited?"
rag(context=search_wikipedia(question), question=question)

Prediction(
    reasoning='The question asks for the name of the castle inherited by David Gregory. Reviewing the provided context, the first paragraph specifically mentions David Gregory and states that he inherited Kinnairdy Castle in 1664. The other paragraphs discuss different individuals (Gregory Tarchaneiotes and Gregory of Gaeta), which are unrelated to this query. Therefore, the relevant information is directly from the first paragraph.',
    response='Kinnairdy Castle'
)

In [6]:
# Example 3: Classification
from typing import Literal

class Classify(dspy.Signature):
    """Classify sentiment of a given sentence."""

    sentence: str = dspy.InputField()
    sentiment: Literal['positive', 'negative', 'neutral'] = dspy.OutputField()
    confidence: float = dspy.OutputField()

classify = dspy.Predict(Classify)
classify(sentence="This book was super fun to read, though not the last chapter.")

Prediction(
    sentiment='positive',
    confidence=0.85
)

In [7]:
# Example 4: Information Extraction
class ExtractInfo(dspy.Signature):
    """Extract structured information from text."""

    text: str = dspy.InputField()
    title: str = dspy.OutputField()
    headings: list[str] = dspy.OutputField()
    entities: list[dict[str, str]] = dspy.OutputField(desc="a list of entities and their metadata")

module = dspy.Predict(ExtractInfo)

text = "Apple Inc. announced its latest iPhone 14 today." \
    "The CEO, Tim Cook, highlighted its new features in a press release."
response = module(text=text)

print(response.title)
print(response.headings)
print(response.entities)

Apple Announces iPhone 14
['Apple Inc.', 'iPhone 14', 'Tim Cook']
[{'name': 'Apple Inc.', 'type': 'Organization'}, {'name': 'iPhone 14', 'type': 'Product'}, {'name': 'Tim Cook', 'type': 'Person'}]


In [8]:
# Example 5: Agents
def evaluate_math(expression: str):
    return dspy.PythonInterpreter({}).execute(expression)

def search_wikipedia(query: str):
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=3)
    return [x['text'] for x in results]

react = dspy.ReAct("question -> answer: float", tools=[evaluate_math, search_wikipedia])

pred = react(question="What is 9362158 divided by the year of birth of David Gregory of Kinnairdy castle?")
print(pred.answer)

5761.33


In [9]:
# Example 6: Multi-stage pipeline
class Outline(dspy.Signature):
    """Outline a thorough overview of a topic."""

    topic: str = dspy.InputField()
    title: str = dspy.OutputField()
    sections: list[str] = dspy.OutputField()
    section_subheadings: dict[str, list[str]] = dspy.OutputField(desc="mapping from section headings to subheadings")

class DraftSection(dspy.Signature):
    """Draft a top-level section of an article."""

    topic: str = dspy.InputField()
    section_heading: str = dspy.InputField()
    section_subheadings: list[str] = dspy.InputField()
    content: str = dspy.OutputField(desc="markdown-formatted section")

class DraftArticle(dspy.Module):
    def __init__(self):
        self.build_outline = dspy.ChainOfThought(Outline)
        self.draft_section = dspy.ChainOfThought(DraftSection)

    def forward(self, topic):
        outline = self.build_outline(topic=topic)
        sections = []
        for heading, subheadings in outline.section_subheadings.items():
            section, subheadings = f"## {heading}", [f"### {subheading}" for subheading in subheadings]
            section = self.draft_section(topic=outline.title, section_heading=section, section_subheadings=subheadings)
            sections.append(section.content)
        return dspy.Prediction(title=outline.title, sections=sections)

draft_article = DraftArticle()
article = draft_article(topic="World Cup 2002")

In [10]:
from IPython.display import display, Markdown
for s in article.sections: display(Markdown(s))

## Introduction

The 2002 FIFA World Cup was a pivotal moment in the history of international football, marking the first time the tournament was hosted in Asia and co-organized by Japan and South Korea. Held from May 31 to June 30, 2002, it featured 32 teams competing for global glory, captivating audiences worldwide with thrilling matches, cultural exchanges, and unexpected outcomes. This event not only showcased the sport's universal appeal but also highlighted advancements in technology, stadium infrastructure, and fan engagement, setting the stage for future tournaments.

### Historical Context

The FIFA World Cup traces its roots back to 1930, when the first edition was held in Uruguay, and has since grown into the most-watched sporting event on the planet. By 2002, the tournament had expanded significantly, with the 1998 edition in France drawing record viewership. The 2002 World Cup was particularly noteworthy as it broke new ground by being hosted in Asia, fulfilling FIFA's goal of global inclusivity. This shift came after decades of European and South American dominance, reflecting the sport's rising popularity in other regions and addressing geopolitical efforts to promote unity through sports.

### Tournament Format and Significance

The 2002 World Cup followed a familiar format: 32 teams were divided into eight groups of four, with the top two teams from each group advancing to a single-elimination knockout stage. This structure, which included group-stage matches, round of 16, quarterfinals, semifinals, and the final, ensured high-stakes drama and strategic gameplay. The tournament's significance extended beyond the pitch; it was a symbol of international cooperation, as co-hosts Japan and South Korea demonstrated successful collaboration despite historical tensions. Additionally, upsets like South Korea's semi-final run and the emergence of stars like Ronaldo underscored the event's role in inspiring new generations and elevating football's global stature.

## Host Countries and Venues

### Host Nations Overview
The 2002 FIFA World Cup marked a historic milestone as the first tournament co-hosted by two nations: Japan and South Korea. This decision, announced in 1996, was driven by FIFA's aim to promote football across Asia and foster regional unity. Japan, with its advanced infrastructure and growing football enthusiasm, hosted 10 matches, including the opening game in Sapporo. South Korea, equally enthusiastic, managed 10 matches as well, with the final held in Yokohama, Japan. The co-hosting arrangement highlighted cultural exchanges, with matches spread across both countries to showcase their distinct heritages. This setup not only boosted tourism and economic growth in both nations but also symbolized peace and cooperation on the Korean Peninsula, despite ongoing geopolitical tensions.

### Key Stadiums and Logistics
The 2002 World Cup featured 20 state-of-the-art stadiums across Japan and South Korea, each designed to accommodate large crowds and enhance the spectator experience. In Japan, iconic venues included the Yokohama International Stadium (site of the final match) and the Saitama Stadium, known for its modern architecture and capacity of over 60,000. South Korea's highlights were the Seoul World Cup Stadium and the Daegu Stadium, which hosted key group-stage and knockout games. Logistics were meticulously planned to handle the influx of fans, with high-speed trains and efficient public transport systems connecting venues. FIFA coordinated seamless operations, including security measures and bilingual signage, to ensure smooth travel between host cities. This infrastructure not only facilitated the tournament's success but also left a lasting legacy, with many stadiums continuing to host major events today.

## Participating Teams and Qualification

### Qualified Teams by Continent

The 2002 FIFA World Cup, co-hosted by South Korea and Japan, featured 32 qualified teams from across the globe, determined through continental qualification processes. Teams were allocated based on their respective football confederations, ensuring representation from various regions.

- **UEFA (Europe)**: With 15 slots, Europe had the largest contingent. Notable qualifiers included powerhouses like Germany, Italy, England, Spain, and France (the defending champions). Other teams from this region were Portugal, Sweden, Denmark, Turkey, Poland, Belgium, Russia, Slovenia, and Croatia. The European qualification was highly competitive, with group stages and playoffs eliminating many strong sides.

- **CONMEBOL (South America)**: South America secured 5 spots, dominated by traditional giants. Brazil, Argentina, Ecuador, Paraguay, and Uruguay qualified. Brazil's qualification was straightforward, but Ecuador's emergence as a first-time qualifier was a significant highlight.

- **CAF (Africa)**: Africa had 5 representatives: Senegal, South Africa, Tunisia, Cameroon, and Nigeria. This was a breakthrough for African football, with Senegal making their debut and eventually reaching the quarterfinals.

- **AFC (Asia)**: As co-hosts, South Korea and Japan automatically qualified, taking 2 spots. Saudi Arabia and China also joined, making a total of 4 teams from Asia.

- **CONCACAF (North America, Central America, and the Caribbean)**: This region contributed 3 teams: Mexico, United States, and Costa Rica. The US team's qualification was marked by their strong performance in the regional tournament.

- **OFC (Oceania)**: Australia qualified but was later eliminated in an intercontinental playoff, with no team from Oceania ultimately participating.

- **CONMEBOL vs. OFC Playoff**: For the final spot, Uruguay from South America defeated Australia in a playoff, securing their place.

### Qualification Process and Highlights

The qualification process for the 2002 FIFA World Cup began in 1999 and involved over 190 teams competing across their confederations. Each confederation conducted its own tournaments, typically consisting of group stages, knockout rounds, and sometimes playoffs, to determine the qualifiers. The process emphasized regional competition while allowing for global representation.

Key highlights included:
- **Surprising Qualifiers**: Senegal's qualification as first-timers from Africa was a major story, achieved through a dramatic campaign led by coach Bruno Metsu. Similarly, Turkey's strong performance in European qualifiers set the stage for their impressive tournament run.
- **Upsets and Exclusions**: High-profile teams like France (the reigning champions) had to fight hard, but others like Scotland and Wales failed to qualify, adding drama. In Asia, China's qualification was historic as their first-ever World Cup appearance.
- **Playoff Drama**: The intercontinental playoff between Uruguay and Australia was a tense affair, with Uruguay winning 3-2 on aggregate, showcasing the high stakes of these matches.
- **Overall Structure**: FIFA's allocation ensured fairness, with slots distributed as follows: 14 for UEFA, 5 for CONMEBOL, 5 for CAF, 4 for AFC (including hosts), 3 for CONCACAF, and 1 via playoff. This process not only selected the teams but also built excitement leading up to the event, highlighting emerging talents and underdog stories.

This section provides a balanced overview, focusing on facts while maintaining engagement for readers interested in the tournament's buildup.

## Group Stage

The group stage of the 2002 FIFA World Cup, held in South Korea and Japan from May 31 to June 14, featured 32 teams divided into eight groups (A through H). Each group consisted of four teams competing in a round-robin format, with the top two teams from each group advancing to the knockout stage. This phase was marked by surprising results, underdog victories, and high-stakes drama that set the tone for the rest of the tournament.

### Group Compositions

The groups were carefully balanced to include a mix of traditional powerhouses, emerging nations, and host countries. Here's a breakdown of the teams in each group:

- **Group A**: Senegal, Uruguay, Denmark, France  
- **Group B**: Spain, Slovenia, South Africa, Paraguay  
- **Group C**: Brazil, Turkey, China PR, Costa Rica  
- **Group D**: South Korea, Poland, United States, Portugal  
- **Group E**: Germany, Saudi Arabia, Republic of Ireland, Cameroon  
- **Group F**: Argentina, Nigeria, England, Sweden  
- **Group G**: Italy, Ecuador, Croatia, Mexico  
- **Group H**: Japan, Belgium, Russia, Tunisia  

This composition led to intriguing matchups, with defending champions France in Group A facing early challenges, and host nations South Korea and Japan generating significant excitement.

### Notable Matches and Results

The group stage delivered several memorable moments, including upsets and standout performances. One of the biggest shocks was Senegal's 1-0 victory over France in the opening match, marking a stunning upset against the reigning world champions and ultimately leading to France's group-stage exit. Another highlight was South Korea's 2-0 win over Poland, boosting the hosts' campaign and energizing their fans.

Key results included:
- Brazil's dominant 2-0 win over China PR in Group C, showcasing their attacking prowess with goals from Ronaldo and Rivaldo.
- The United States' 3-2 comeback victory against Portugal in Group D, a thrilling match that saw the U.S. advance to the knockout stage.
- England's 1-0 win over Argentina in Group F, a revenge match from the 1998 World Cup, secured by a David Beckham penalty.

Overall, the group stage saw 15 goals in a single day of matches on June 12, and teams like Turkey and South Korea emerged as dark horses, setting up exciting knockout encounters.

## Knockout Stage

The knockout stage of the 2002 FIFA World Cup, held in South Korea and Japan, marked the intense elimination phase where 16 teams competed for glory after the group stage. This stage began on June 15, 2002, and featured high-stakes matches that showcased dramatic upsets, stellar performances, and unforgettable moments. Spanning from the Round of 16 to the final, it highlighted the tournament's co-hosts' surprising success and established powerhouses' dominance. Below is a breakdown of the key phases.

### Round of 16 Summary

The Round of 16 kicked off the knockout excitement, with eight matches determining which teams advanced. This round saw a mix of expected victories and early shocks, setting the tone for the rest of the tournament.

- **Germany 1-0 Paraguay**: Germany edged out Paraguay in a tightly contested match, thanks to a goal from Oliver Neuville, demonstrating their defensive resilience.
- **United States 2-3 Germany**: The US put up a valiant fight but fell to Germany in extra time, with goals from Brian McBride and Landon Donovan nearly causing an upset.
- **South Korea 2-1 Italy**: One of the biggest upsets in World Cup history, co-host South Korea advanced after a controversial extra-time win, with goals from Seol Ki-hyeon and Ahn Jung-hwan, eliminating the favored Italians.
- **Spain 1-0 Ireland**: Spain progressed via a penalty shootout after a 1-1 draw, with Gaizka Mendieta's spot-kick proving decisive.
- **Brazil 2-0 Belgium**: Brazil's Ronaldo and Rivaldo shone, securing a comfortable win and maintaining their status as favorites.
- **England 3-0 Denmark**: England dominated with goals from Rio Ferdinand, Steven Gerrard, and Michael Owen, showcasing their attacking prowess.
- **Turkey 3-0 Japan**: Turkey advanced with a convincing victory, highlighted by Hakan Şükür's quick goal.
- **South Korea 5-0 Poland**: In another match, South Korea overwhelmed Poland, but the full Round of 16 included the above key fixtures.

This round eliminated half the field and introduced thrilling narratives, such as the co-hosts' momentum.

### Quarterfinals, Semifinals, and Key Upsets

The quarterfinals and semifinals intensified the competition, with teams battling for a spot in the final. This phase featured dominant performances, defensive masterclasses, and more upsets that defined the tournament's legacy.

- **Quarterfinals Highlights**: Brazil defeated England 2-1 in a classic encounter, with Ronaldinho's long-range free-kick standing out. Germany beat the United States 1-0, while South Korea stunned Spain in a penalty shootout after a 0-0 draw, marking another major upset. Turkey edged out Senegal 1-0.
  
- **Semifinals Action**: Brazil continued their strong run, defeating Turkey 1-0 with a goal from Ronaldo. South Korea faced Germany in a thrilling semifinal, losing 1-0 to a late goal from Oliver Bierhoff, ending their fairy-tale campaign.

Key upsets included South Korea's elimination of Italy and Spain, which propelled them to the semifinals as the first Asian team to reach that stage. These moments not only shocked the football world but also highlighted the growing competitiveness of underdog teams. The knockout stage ultimately culminated in Brazil's victory in the final, solidifying their status as five-time champions.

## Final and Champion

### The Final Match Details

The 2002 FIFA World Cup final took place on June 30, 2002, at the International Stadium in Yokohama, Japan. This highly anticipated match featured Brazil, a football powerhouse seeking its fifth World Cup title, against Germany, a resilient European side aiming for its fourth championship. The game was a showcase of tactical prowess and individual brilliance, with Brazil dominating proceedings. The match ended in a 2-0 victory for Brazil, thanks to two goals from star forward Ronaldo in the 67th and 79th minutes. Despite Germany's strong defensive efforts and a few close attempts, Brazil's fluid attacking play and clinical finishing secured their place in history. The event drew a global television audience of over 1 billion viewers, highlighting the World Cup's immense popularity.

### Winning Team and Awards

Brazil emerged as the champion of the 2002 FIFA World Cup, clinching their record fifth title and solidifying their status as one of the most successful teams in the tournament's history. The final score against Germany was 2-0, with Ronaldo's brace proving decisive. In addition to the team trophy, several individual awards were handed out to recognize outstanding performances. Ronaldo was named the tournament's top scorer with eight goals, earning him the Golden Boot and the Golden Ball as the best player overall. Other notable awards included the Golden Glove for Germany's Oliver Kahn, who was recognized for his exceptional goalkeeping despite the loss, and the FIFA Fair Play Award, which went to South Korea for their sportsmanship throughout the competition. Brazil's victory marked a triumphant end to the first World Cup co-hosted by Asia.

## Key Moments and Controversies

The 2002 FIFA World Cup, held in South Korea and Japan, was filled with thrilling moments that captivated global audiences, as well as several controversies that sparked debates among fans and experts. This section explores the standout highlights and the incidents that left a lasting impact on the tournament's legacy.

### Memorable Goals and Performances

The 2002 World Cup featured some of the most iconic goals and performances in football history. Brazil's Ronaldo, often called "O Fenômeno," delivered a stellar performance, scoring twice in the final against Germany to secure Brazil's fifth World Cup title with a 2-0 victory. His resilience after previous injuries made this a triumphant comeback story. Another memorable moment was Rivaldo's audacious goal against Turkey in the group stage, where he chested the ball down and volleyed it into the net, showcasing Brazil's flair. Japan's Hidetoshi Nakata and Senegal's surprise run, led by players like El Hadji Diouf, also stood out, with Turkey's third-place finish highlighted by Ümit Davala's contributions, making this tournament a celebration of underdogs and stars alike.

### Controversial Decisions and Incidents

Controversies marred several matches, drawing scrutiny to officiating and player conduct. One of the most infamous incidents involved Brazil's Rivaldo during the group stage match against Turkey; after being hit by a ball, he dramatically fell to the ground, leading to a red card for Turkey's Hakan Ünsal and widespread criticism for simulation. Refereeing decisions also came under fire, such as in the England vs. Argentina round-of-16 match, where debatable calls, including a disallowed goal, influenced the outcome and fueled accusations of bias. These events raised questions about fair play and the use of technology in football, underscoring the tournament's dramatic edge.

## Legacy and Impact

The 2002 FIFA World Cup, co-hosted by South Korea and Japan, left an indelible mark on global football and beyond. As the first World Cup held in Asia, it not only showcased the sport's universal appeal but also catalyzed significant changes in infrastructure, participation, and international perceptions. This section explores the tournament's enduring influence, particularly in Asia, and its broader cultural and economic ramifications.

### Influence on Asian Football

The 2002 World Cup profoundly transformed football in Asia, elevating the continent's profile in the global game. Hosting the event led to substantial investments in sports facilities, with new stadiums and training centers built in South Korea and Japan that continue to support domestic leagues and youth development programs. South Korea's unexpected semi-final run, led by stars like Hong Myung-bo, inspired a surge in youth participation, with football enrollment in schools skyrocketing and contributing to the country's rise in international rankings. Japan also benefited, as the tournament accelerated professionalization of the J.League, drawing more foreign talent and fostering a competitive environment. Overall, the event broke down stereotypes about Asian football, encouraging greater investment from Asian Football Confederation (AFC) nations and paving the way for future successes, such as Asia's increased representation in subsequent World Cups.

### Cultural and Economic Effects

Beyond the pitch, the 2002 FIFA World Cup had far-reaching cultural and economic impacts that extended well into the 21st century. Culturally, it promoted East Asian unity and global exchange, with the co-hosting model between South Korea and Japan symbolizing reconciliation and fostering mutual understanding through shared events and fan interactions. The tournament popularized football as a cultural phenomenon in Asia, influencing media, music, and art, while also introducing Western football traditions to new audiences. Economically, it generated billions in revenue through tourism, broadcasting rights, and merchandise sales, boosting local economies in host cities. Infrastructure projects, such as improved transportation and hotels, created lasting jobs and stimulated growth, with estimates suggesting a multi-billion-dollar economic uplift. However, it also highlighted challenges like rising costs and environmental concerns, underscoring the need for sustainable event planning in future global tournaments.

# Getting Started III: Optimizing the LM prompts or weights in DSPy programs

In [15]:
# Optimizing prompts for a ReAct agent
import dspy
from dspy.datasets import HotPotQA

# dspy.configure(lm=dspy.LM('openai/gpt-4o-mini'))

def search_wikipedia(query: str) -> list[str]:
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=3)
    return [x['text'] for x in results]

trainset = [x.with_inputs('question') for x in HotPotQA(train_seed=2024, train_size=500).train]
react = dspy.ReAct("question -> answer", tools=[search_wikipedia])

tp = dspy.MIPROv2(metric=dspy.evaluate.answer_exact_match, auto="light", num_threads=24)
optimized_react = tp.compile(react, trainset=trainset, requires_permission_to_run=False)

2025/06/24 17:56:22 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 20
minibatch: True
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 100

2025/06/24 17:56:22 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/06/24 17:56:22 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/06/24 17:56:22 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


 11%|█         | 11/100 [02:26<19:43, 13.30s/it]


Bootstrapped 4 full traces after 11 examples for up to 1 rounds, amounting to 11 attempts.
Bootstrapping set 4/6


  7%|▋         | 7/100 [02:07<28:17, 18.26s/it]  


Bootstrapped 3 full traces after 7 examples for up to 1 rounds, amounting to 7 attempts.
Bootstrapping set 5/6


  5%|▌         | 5/100 [01:36<30:42, 19.40s/it]


Bootstrapped 1 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Bootstrapping set 6/6


  2%|▏         | 2/100 [00:19<16:12,  9.92s/it]
2025/06/24 18:02:53 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/06/24 18:02:53 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 1 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2025/06/24 18:04:04 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...

2025/06/24 18:08:17 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/06/24 18:08:17 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Given the fields `question`, produce the fields `answer`.

You are an Agent. In each episode, you will be given the fields `question` as input. And you can see your past trajectory so far.
Your goal is to use one or more of the supplied tools to collect any necessary information for producing `answer`.

To do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.
After each tool call, you receive a resulting observation, which gets appended to your trajectory.

When writing next_thought, you may reason about the current situation and plan for future steps.
When selecting the next_tool_name and its next_tool_args, the tool must be one of:

(1) search_wikipedia. It ta

Average Metric: 42.00 / 100 (42.0%): 100%|██████████| 100/100 [01:56<00:00,  1.16s/it]

2025/06/24 18:10:14 INFO dspy.evaluate.evaluate: Average Metric: 42 / 100 (42.0%)
2025/06/24 18:10:14 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 42.0

2025/06/24 18:10:14 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 25 - Minibatch ==



Average Metric: 11.00 / 35 (31.4%): 100%|██████████| 35/35 [00:43<00:00,  1.25s/it]

2025/06/24 18:10:58 INFO dspy.evaluate.evaluate: Average Metric: 11 / 35 (31.4%)
2025/06/24 18:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 31.43 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 0'].
2025/06/24 18:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43]
2025/06/24 18:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0]
2025/06/24 18:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 42.0


2025/06/24 18:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 25 - Minibatch ==



Average Metric: 13.00 / 35 (37.1%): 100%|██████████| 35/35 [00:46<00:00,  1.34s/it]

2025/06/24 18:11:46 INFO dspy.evaluate.evaluate: Average Metric: 13 / 35 (37.1%)
2025/06/24 18:11:46 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 37.14 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 2'].
2025/06/24 18:11:46 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14]
2025/06/24 18:11:46 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0]
2025/06/24 18:11:46 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 42.0


2025/06/24 18:11:46 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 25 - Minibatch ==



Average Metric: 20.00 / 35 (57.1%): 100%|██████████| 35/35 [00:56<00:00,  1.62s/it]

2025/06/24 18:12:43 INFO dspy.evaluate.evaluate: Average Metric: 20 / 35 (57.1%)
2025/06/24 18:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 57.14 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 0'].
2025/06/24 18:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14]
2025/06/24 18:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0]
2025/06/24 18:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 42.0


2025/06/24 18:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 25 - Minibatch ==



Average Metric: 21.00 / 35 (60.0%): 100%|██████████| 35/35 [00:38<00:00,  1.09s/it]

2025/06/24 18:13:21 INFO dspy.evaluate.evaluate: Average Metric: 21 / 35 (60.0%)
2025/06/24 18:13:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 60.0 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 4'].
2025/06/24 18:13:21 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0]
2025/06/24 18:13:21 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0]
2025/06/24 18:13:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 42.0


2025/06/24 18:13:21 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 25 - Minibatch ==



Average Metric: 19.00 / 35 (54.3%): 100%|██████████| 35/35 [00:44<00:00,  1.26s/it]

2025/06/24 18:14:06 INFO dspy.evaluate.evaluate: Average Metric: 19 / 35 (54.3%)
2025/06/24 18:14:06 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 54.29 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 2'].
2025/06/24 18:14:06 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29]
2025/06/24 18:14:06 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0]
2025/06/24 18:14:06 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 42.0


2025/06/24 18:14:06 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 25 - Full Evaluation =====
2025/06/24 18:14:06 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 60.0) from minibatch trials...



Average Metric: 59.00 / 100 (59.0%): 100%|██████████| 100/100 [01:06<00:00,  1.50it/s]

2025/06/24 18:15:13 INFO dspy.evaluate.evaluate: Average Metric: 59 / 100 (59.0%)
2025/06/24 18:15:13 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 59.0
2025/06/24 18:15:13 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0]
2025/06/24 18:15:13 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 59.0
2025/06/24 18:15:13 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/06/24 18:15:13 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 8 / 25 - Minibatch ==



Average Metric: 15.00 / 35 (42.9%): 100%|██████████| 35/35 [00:39<00:00,  1.12s/it]

2025/06/24 18:15:53 INFO dspy.evaluate.evaluate: Average Metric: 15 / 35 (42.9%)
2025/06/24 18:15:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 42.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/06/24 18:15:53 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86]
2025/06/24 18:15:53 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0]
2025/06/24 18:15:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 59.0


2025/06/24 18:15:53 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 9 / 25 - Minibatch ==



Average Metric: 23.00 / 35 (65.7%): 100%|██████████| 35/35 [00:43<00:00,  1.25s/it]

2025/06/24 18:16:37 INFO dspy.evaluate.evaluate: Average Metric: 23 / 35 (65.7%)





2025/06/24 18:16:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 65.71 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 1'].
2025/06/24 18:16:37 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71]
2025/06/24 18:16:37 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0]
2025/06/24 18:16:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 59.0


2025/06/24 18:16:37 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 10 / 25 - Minibatch ==


Average Metric: 15.00 / 35 (42.9%): 100%|██████████| 35/35 [00:48<00:00,  1.38s/it]

2025/06/24 18:17:26 INFO dspy.evaluate.evaluate: Average Metric: 15 / 35 (42.9%)
2025/06/24 18:17:26 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 42.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/06/24 18:17:26 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86]
2025/06/24 18:17:26 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0]
2025/06/24 18:17:26 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 59.0


2025/06/24 18:17:26 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 11 / 25 - Minibatch ==



Average Metric: 18.00 / 35 (51.4%): 100%|██████████| 35/35 [00:57<00:00,  1.64s/it]

2025/06/24 18:18:24 INFO dspy.evaluate.evaluate: Average Metric: 18 / 35 (51.4%)
2025/06/24 18:18:24 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 51.43 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 1'].
2025/06/24 18:18:24 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43]
2025/06/24 18:18:24 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0]
2025/06/24 18:18:24 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 59.0


2025/06/24 18:18:24 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 12 / 25 - Minibatch ==



Average Metric: 20.00 / 35 (57.1%): 100%|██████████| 35/35 [00:39<00:00,  1.12s/it]

2025/06/24 18:19:04 INFO dspy.evaluate.evaluate: Average Metric: 20 / 35 (57.1%)
2025/06/24 18:19:04 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 57.14 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 4'].
2025/06/24 18:19:04 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14]
2025/06/24 18:19:04 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0]
2025/06/24 18:19:04 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 59.0


2025/06/24 18:19:04 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 25 - Full Evaluation =====
2025/06/24 18:19:04 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 65.71) from minibatch trials...



Average Metric: 60.00 / 100 (60.0%): 100%|██████████| 100/100 [01:12<00:00,  1.39it/s]

2025/06/24 18:20:17 INFO dspy.evaluate.evaluate: Average Metric: 60 / 100 (60.0%)
2025/06/24 18:20:17 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 60.0





2025/06/24 18:20:17 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0]
2025/06/24 18:20:17 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0
2025/06/24 18:20:17 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/06/24 18:20:17 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 14 / 25 - Minibatch ==


Average Metric: 21.00 / 35 (60.0%): 100%|██████████| 35/35 [00:00<00:00, 412.02it/s]

2025/06/24 18:20:19 INFO dspy.evaluate.evaluate: Average Metric: 21 / 35 (60.0%)





2025/06/24 18:20:19 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 60.0 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 4'].
2025/06/24 18:20:19 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0]
2025/06/24 18:20:19 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0]
2025/06/24 18:20:19 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:20:19 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 15 / 25 - Minibatch ==


Average Metric: 23.00 / 35 (65.7%): 100%|██████████| 35/35 [00:10<00:00,  3.32it/s]

2025/06/24 18:20:31 INFO dspy.evaluate.evaluate: Average Metric: 23 / 35 (65.7%)
2025/06/24 18:20:32 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 65.71 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 1'].
2025/06/24 18:20:32 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71]
2025/06/24 18:20:32 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0]
2025/06/24 18:20:32 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:20:32 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 16 / 25 - Minibatch ==



Average Metric: 21.00 / 35 (60.0%): 100%|██████████| 35/35 [00:08<00:00,  4.11it/s]

2025/06/24 18:20:42 INFO dspy.evaluate.evaluate: Average Metric: 21 / 35 (60.0%)





2025/06/24 18:20:43 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 60.0 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 5'].
2025/06/24 18:20:43 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71, 60.0]
2025/06/24 18:20:43 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0]
2025/06/24 18:20:43 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:20:43 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 17 / 25 - Minibatch ==


Average Metric: 18.00 / 35 (51.4%): 100%|██████████| 35/35 [00:11<00:00,  3.08it/s]

2025/06/24 18:20:56 INFO dspy.evaluate.evaluate: Average Metric: 18 / 35 (51.4%)
2025/06/24 18:20:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 51.43 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 1'].
2025/06/24 18:20:56 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71, 60.0, 51.43]
2025/06/24 18:20:56 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0]
2025/06/24 18:20:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:20:56 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 18 / 25 - Minibatch ==



Average Metric: 22.00 / 35 (62.9%): 100%|██████████| 35/35 [00:35<00:00,  1.00s/it]

2025/06/24 18:21:31 INFO dspy.evaluate.evaluate: Average Metric: 22 / 35 (62.9%)
2025/06/24 18:21:31 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 62.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 1'].
2025/06/24 18:21:31 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71, 60.0, 51.43, 62.86]
2025/06/24 18:21:31 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0]
2025/06/24 18:21:32 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:21:32 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 19 / 25 - Full Evaluation =====
2025/06/24 18:21:32 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 62.86) from minibatch trials...



Average Metric: 60.00 / 100 (60.0%): 100%|██████████| 100/100 [01:09<00:00,  1.45it/s]

2025/06/24 18:22:42 INFO dspy.evaluate.evaluate: Average Metric: 60 / 100 (60.0%)
2025/06/24 18:22:42 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0, 60.0]
2025/06/24 18:22:42 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0
2025/06/24 18:22:42 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/06/24 18:22:42 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 20 / 25 - Minibatch ==



Average Metric: 22.00 / 35 (62.9%): 100%|██████████| 35/35 [00:46<00:00,  1.32s/it]

2025/06/24 18:23:29 INFO dspy.evaluate.evaluate: Average Metric: 22 / 35 (62.9%)
2025/06/24 18:23:29 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 62.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 1', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 1'].
2025/06/24 18:23:29 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71, 60.0, 51.43, 62.86, 62.86]
2025/06/24 18:23:29 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0, 60.0]
2025/06/24 18:23:29 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:23:29 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 21 / 25 - Minibatch ==



Average Metric: 20.00 / 35 (57.1%): 100%|██████████| 35/35 [00:08<00:00,  4.18it/s]

2025/06/24 18:23:39 INFO dspy.evaluate.evaluate: Average Metric: 20 / 35 (57.1%)
2025/06/24 18:23:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 57.14 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 4'].
2025/06/24 18:23:39 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71, 60.0, 51.43, 62.86, 62.86, 57.14]
2025/06/24 18:23:39 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0, 60.0]
2025/06/24 18:23:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:23:39 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 22 / 25 - Minibatch ==



Average Metric: 23.00 / 35 (65.7%): 100%|██████████| 35/35 [00:12<00:00,  2.89it/s]

2025/06/24 18:23:52 INFO dspy.evaluate.evaluate: Average Metric: 23 / 35 (65.7%)
2025/06/24 18:23:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 65.71 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 5'].
2025/06/24 18:23:53 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71, 60.0, 51.43, 62.86, 62.86, 57.14, 65.71]
2025/06/24 18:23:53 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0, 60.0]
2025/06/24 18:23:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:23:53 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 23 / 25 - Minibatch ==



Average Metric: 18.00 / 35 (51.4%): 100%|██████████| 35/35 [00:08<00:00,  4.29it/s] 

2025/06/24 18:24:02 INFO dspy.evaluate.evaluate: Average Metric: 18 / 35 (51.4%)
2025/06/24 18:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 51.43 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 5'].
2025/06/24 18:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71, 60.0, 51.43, 62.86, 62.86, 57.14, 65.71, 51.43]
2025/06/24 18:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0, 60.0]
2025/06/24 18:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 24 / 25 - Minibatch ==



Average Metric: 24.00 / 35 (68.6%): 100%|██████████| 35/35 [00:00<00:00, 511.69it/s]

2025/06/24 18:24:05 INFO dspy.evaluate.evaluate: Average Metric: 24 / 35 (68.6%)





2025/06/24 18:24:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.57 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 1'].
2025/06/24 18:24:05 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [31.43, 37.14, 57.14, 60.0, 54.29, 42.86, 65.71, 42.86, 51.43, 57.14, 60.0, 65.71, 60.0, 51.43, 62.86, 62.86, 57.14, 65.71, 51.43, 68.57]
2025/06/24 18:24:05 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0, 60.0]
2025/06/24 18:24:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 60.0


2025/06/24 18:24:05 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 25 / 25 - Full Evaluation =====
2025/06/24 18:24:05 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 62.86) from minibatch trials...


Average Metric: 62.00 / 100 (62.0%): 100%|██████████| 100/100 [01:06<00:00,  1.50it/s]

2025/06/24 18:25:13 INFO dspy.evaluate.evaluate: Average Metric: 62 / 100 (62.0%)
2025/06/24 18:25:13 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 62.0
2025/06/24 18:25:13 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [42.0, 59.0, 60.0, 60.0, 62.0]
2025/06/24 18:25:13 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 62.0
2025/06/24 18:25:13 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/06/24 18:25:13 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 62.0!





In [16]:
optimized_react

react = Predict(StringSignature(question, trajectory -> next_thought, next_tool_name, next_tool_args
    instructions="You are an intelligent Agent tasked with answering user questions that may require external knowledge. Given the input fields `question` (the user's query) and `trajectory` (a history of your previous thoughts, tool calls, and observations), your goal is to generate the outputs: `next_thought` (a clear, step-by-step reasoning about the current state and your plan for the next action), `next_tool_name` (the name of the tool to use next, either 'search_wikipedia' or 'finish'), and `next_tool_args` (a dictionary in JSON format specifying the arguments for the selected tool).\n\nAvailable tools are:\n(1) search_wikipedia: Use this to query Wikipedia for information. It requires a single argument: {'query': 'a string representing the search term'}.\n(2) finish: Use this when you have gathered all necessary information to answer the question, marking the task as complete. It