In [1]:
import dspy
from dspy.evaluate import Evaluate
from dspy.datasets.hotpotqa import HotPotQA
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
from compiler.utils import load_api_key

load_api_key('/mnt/ssd4/lm_compiler/secrets.toml')

gpt4o_mini = dspy.OpenAI('gpt-4o-mini', max_tokens=1000)
colbert = dspy.ColBERTv2(url='http://192.168.1.18:8893/api/search')
dspy.configure(lm=gpt4o_mini, rm=colbert)

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = HotPotQA(train_seed=1, train_size=200, eval_seed=2023, dev_size=50, test_size=0)
trainset = [x.with_inputs('question') for x in dataset.train[0:150]]
valset = [x.with_inputs('question') for x in dataset.train[150:200]]
devset = [x.with_inputs('question') for x in dataset.dev]

# show an example datapoint; it's just a question-answer pair
trainset[0]

Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys={'question'})

In [3]:
from dsp.utils.utils import deduplicate

class BasicMH(dspy.Module):
    def __init__(self, passages_per_hop=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=passages_per_hop)
        self.generate_query = [dspy.ChainOfThought("context, question -> search_query") for _ in range(2)]
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        context = []

        for hop in range(2):
            search_query = self.generate_query[hop](context=context, question=question).search_query
            passages = self.retrieve(search_query).passages
            context = deduplicate(context + passages)

        return self.generate_answer(context=context, question=question).copy(context=context)

In [4]:
agent = BasicMH(passages_per_hop=2)

In [5]:
# Set up an evaluator on the first 300 examples of the devset.
config = dict(num_threads=8, display_progress=True, display_table=5)
evaluate = Evaluate(devset=devset, metric=dspy.evaluate.answer_exact_match, **config)

evaluate(agent)

 		You are using the client GPT3, which will be removed in DSPy 2.6.
 		Changing the client is straightforward and will let you use new features (Adapters) that improve the consistency of LM outputs, especially when using chat LMs. 

 		Learn more about the changes and how to migrate at
 		https://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb
 		You are using the client GPT3, which will be removed in DSPy 2.6.
 		Changing the client is straightforward and will let you use new features (Adapters) that improve the consistency of LM outputs, especially when using chat LMs. 

 		Learn more about the changes and how to migrate at
 		https://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb
 		You are using the client GPT3, which will be removed in DSPy 2.6.
 		Changing the client is straightforward and will let you use new features (Adapters) that improve the consistency of LM outputs, especially when using chat LMs. 

 		Learn more about the changes and how to

Unnamed: 0,question,example_answer,gold_titles,rationale,pred_answer,context,answer_exact_match
0,Are both Cangzhou and Qionghai in the Hebei province of China?,no,"{'Cangzhou', 'Qionghai'}","determine the locations of Cangzhou and Qionghai. Cangzhou is explicitly stated to be in eastern Hebei province, while Qionghai is identified as a county-level city...","No, Cangzhou is in Hebei province, while Qionghai is in Hainan province.","['Cangzhou | ""Cangzhou Cangzhou () is a prefecture-level city in eastern Hebei province, People\'s Republic of China. At the 2010 census, Cangzhou\'s built-up (""""or metro"""")...",
1,Who conducts the draft in which Marc-Andre Fleury was drafted to the Vegas Golden Knights for the 2017-18 season?,National Hockey League,"{'2017 NHL Expansion Draft', '2017–18 Pittsburgh Penguins season'}",identify the organization responsible for conducting the draft. The context mentions that the 2017 NHL Expansion Draft was conducted by the National Hockey League (NHL)...,The National Hockey League (NHL) conducts the draft.,"['""Marc-André Fleury"" | ""route to a Stanley Cup championship victory, defeating the Nashville Predators in six games. The win was the third Stanley Cup of...",
2,"The Wings entered a new era, following the retirement of which Canadian retired professional ice hockey player and current general manager of the Tampa Bay...",Steve Yzerman,"{'2006–07 Detroit Red Wings season', 'Steve Yzerman'}",identify the retired professional ice hockey player mentioned in the context. We know that the question refers to a significant event in the history of...,Steve Yzerman,"['""Julien BriseBois"" | ""Julien BriseBois Julien BriseBois (born January 24, 1977) is a Canadian ice hockey executive. He is the general manager for the Tampa...",✔️ [True]
3,What river is near the Crichton Collegiate Church?,the River Tyne,"{'Crichton Castle', 'Crichton Collegiate Church'}","determine the river near the Crichton Collegiate Church. We know that Crichton Collegiate Church is located in Midlothian, Scotland, and from the context provided, we...",River Tyne,"['""Crichton Collegiate Church"" | ""Crichton Collegiate Church Crichton Collegiate Church is situated about south west of the hamlet of Crichton in Midlothian, Scotland. Crichton itself...",✔️ [True]
4,In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king?,King Alfred the Great,"{'Æthelweard (son of Alfred)', 'Ealhswith'}",determine the answer. We know from the context that Æthelweard was the younger son of King Alfred the Great and Ealhswith. Since the question specifically...,King Alfred the Great,"['""Æthelweard (son of Alfred)"" | ""Æthelweard (son of Alfred) Æthelweard (d. 920 or 922) was the younger son of King Alfred the Great and Ealhswith....",✔️ [True]


32.0

In [6]:
import dspy.evaluate
from dspy.teleprompt import BootstrapFewShotWithRandomSearch, MIPROv2
prmopt_model = dspy.OpenAI(model='gpt-4o-mini', max_tokens=1000)
dspy.settings.configure(lm=prmopt_model)
bootstrap_optimizer = MIPROv2(
    prompt_model=prmopt_model,
    metric=dspy.evaluate.answer_exact_match,
    num_candidates=20,
    num_threads=8,
)
optimized_matplot = bootstrap_optimizer.compile(
    agent,
    trainset=trainset,
    max_bootstrapped_demos=2,
    max_labeled_demos=2,
    num_trials=25,
    valset=valset,
    requires_permission_to_run=False,
)

Beginning MIPROv2 optimization process...

==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
These will be used for as few-shot examples candidates for our program and for creating instructions.

Bootstrapping N=20 sets of demonstrations...
Bootstrapping set 1/20
Bootstrapping set 2/20
Bootstrapping set 3/20


  5%|▍         | 7/150 [00:42<14:28,  6.08s/it]


Bootstrapped 2 full traces after 8 examples in round 0.
Bootstrapping set 4/20


  3%|▎         | 4/150 [00:25<15:32,  6.39s/it]


Bootstrapped 2 full traces after 5 examples in round 0.
Bootstrapping set 5/20


  2%|▏         | 3/150 [00:16<13:43,  5.60s/it]


Bootstrapped 1 full traces after 4 examples in round 0.
Bootstrapping set 6/20


  1%|▏         | 2/150 [00:06<08:32,  3.46s/it]


Bootstrapped 1 full traces after 3 examples in round 0.
Bootstrapping set 7/20


  1%|          | 1/150 [00:05<14:29,  5.83s/it]


Bootstrapped 1 full traces after 2 examples in round 0.
Bootstrapping set 8/20


  3%|▎         | 4/150 [00:20<12:40,  5.21s/it]


Bootstrapped 1 full traces after 5 examples in round 0.
Bootstrapping set 9/20


  1%|▏         | 2/150 [00:10<13:22,  5.42s/it]


Bootstrapped 2 full traces after 3 examples in round 0.
Bootstrapping set 10/20


  3%|▎         | 4/150 [00:21<13:19,  5.48s/it]


Bootstrapped 1 full traces after 5 examples in round 0.
Bootstrapping set 11/20


  5%|▍         | 7/150 [00:34<11:52,  4.98s/it]


Bootstrapped 2 full traces after 8 examples in round 0.
Bootstrapping set 12/20


  1%|▏         | 2/150 [00:04<05:19,  2.16s/it]


Bootstrapped 1 full traces after 3 examples in round 0.
Bootstrapping set 13/20


  3%|▎         | 4/150 [00:20<12:18,  5.06s/it]


Bootstrapped 2 full traces after 5 examples in round 0.
Bootstrapping set 14/20


  2%|▏         | 3/150 [00:13<11:20,  4.63s/it]


Bootstrapped 1 full traces after 4 examples in round 0.
Bootstrapping set 15/20


  2%|▏         | 3/150 [00:19<16:19,  6.66s/it]


Bootstrapped 2 full traces after 4 examples in round 0.
Bootstrapping set 16/20


  3%|▎         | 5/150 [00:22<10:41,  4.42s/it]


Bootstrapped 2 full traces after 6 examples in round 0.
Bootstrapping set 17/20


  3%|▎         | 5/150 [00:19<09:24,  3.89s/it]


Bootstrapped 2 full traces after 6 examples in round 0.
Bootstrapping set 18/20


  3%|▎         | 4/150 [00:20<12:46,  5.25s/it]


Bootstrapped 1 full traces after 5 examples in round 0.
Bootstrapping set 19/20


  3%|▎         | 4/150 [00:17<10:55,  4.49s/it]


Bootstrapped 1 full traces after 5 examples in round 0.
Bootstrapping set 20/20


  4%|▍         | 6/150 [00:28<11:29,  4.79s/it]


Bootstrapped 2 full traces after 7 examples in round 0.

==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
In this step, by default we will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.

Proposing instructions...

Proposed Instructions for Predictor 0:

0: Given the fields `context`, `question`, produce the fields `search_query`.

1: Given the provided context and a specific question, explain the reasoning behind your thought process step-by-step to generate a relevant search query that will help in retrieving information to answer the question effectively. Focus on how the context informs the search query, and ensure clarity in your explanation to facilitate understanding of your reasoning.

2: Imagine you are a renowned pop culture expert tasked with answering critical questions for a live trivia competition. Your reputation is on the line as you need to quickly

Average Metric: 22 / 50  (44.0): 100%|██████████| 50/50 [00:34<00:00,  1.44it/s]


Default program score: 44.0

==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==
In this step, we will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination. Bayesian Optimization will be used for this search process.

== Minibatch Trial 1 / 25 ==


Average Metric: 9 / 25  (36.0): 100%|██████████| 25/25 [00:22<00:00,  1.12it/s]


Score: 36.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 12', 'Predictor 1: Few-Shot Set 6', 'Predictor 2: Instruction 8', 'Predictor 2: Few-Shot Set 4', 'Predictor 3: Instruction 3', 'Predictor 3: Few-Shot Set 13'].


== Minibatch Trial 2 / 25 ==


Average Metric: 13 / 25  (52.0): 100%|██████████| 25/25 [00:20<00:00,  1.22it/s]


Score: 52.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 9', 'Predictor 1: Few-Shot Set 7', 'Predictor 2: Instruction 0', 'Predictor 2: Few-Shot Set 9', 'Predictor 3: Instruction 10', 'Predictor 3: Few-Shot Set 15'].


== Minibatch Trial 3 / 25 ==


Average Metric: 15 / 25  (60.0): 100%|██████████| 25/25 [00:24<00:00,  1.02it/s]


Score: 60.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 6', 'Predictor 1: Few-Shot Set 17', 'Predictor 2: Instruction 18', 'Predictor 2: Few-Shot Set 9', 'Predictor 3: Instruction 2', 'Predictor 3: Few-Shot Set 16'].


== Minibatch Trial 4 / 25 ==


Average Metric: 9 / 25  (36.0): 100%|██████████| 25/25 [00:17<00:00,  1.41it/s]


Score: 36.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 8', 'Predictor 2: Instruction 1', 'Predictor 2: Few-Shot Set 13', 'Predictor 3: Instruction 17', 'Predictor 3: Few-Shot Set 3'].


== Minibatch Trial 5 / 25 ==


Average Metric: 13 / 25  (52.0): 100%|██████████| 25/25 [00:22<00:00,  1.12it/s]


Score: 52.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 10', 'Predictor 1: Few-Shot Set 3', 'Predictor 2: Instruction 2', 'Predictor 2: Few-Shot Set 12', 'Predictor 3: Instruction 13', 'Predictor 3: Few-Shot Set 5'].


== Minibatch Trial 6 / 25 ==


Average Metric: 7 / 25  (28.0): 100%|██████████| 25/25 [00:21<00:00,  1.17it/s]


Score: 28.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 9', 'Predictor 2: Instruction 18', 'Predictor 2: Few-Shot Set 12', 'Predictor 3: Instruction 6', 'Predictor 3: Few-Shot Set 0'].


== Minibatch Trial 7 / 25 ==


Average Metric: 11 / 25  (44.0): 100%|██████████| 25/25 [00:14<00:00,  1.74it/s]


Score: 44.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 7', 'Predictor 1: Few-Shot Set 6', 'Predictor 2: Instruction 16', 'Predictor 2: Few-Shot Set 10', 'Predictor 3: Instruction 3', 'Predictor 3: Few-Shot Set 12'].


== Minibatch Trial 8 / 25 ==


Average Metric: 14 / 25  (56.0): 100%|██████████| 25/25 [00:20<00:00,  1.25it/s]


Score: 56.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 12', 'Predictor 1: Few-Shot Set 5', 'Predictor 2: Instruction 11', 'Predictor 2: Few-Shot Set 13', 'Predictor 3: Instruction 12', 'Predictor 3: Few-Shot Set 11'].


== Minibatch Trial 9 / 25 ==


Average Metric: 5 / 25  (20.0): 100%|██████████| 25/25 [00:24<00:00,  1.04it/s]


Score: 20.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 7', 'Predictor 1: Few-Shot Set 19', 'Predictor 2: Instruction 12', 'Predictor 2: Few-Shot Set 9', 'Predictor 3: Instruction 1', 'Predictor 3: Few-Shot Set 0'].


== Minibatch Trial 10 / 25 ==


Average Metric: 13 / 25  (52.0): 100%|██████████| 25/25 [00:19<00:00,  1.29it/s]


Score: 52.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 7', 'Predictor 1: Few-Shot Set 8', 'Predictor 2: Instruction 18', 'Predictor 2: Few-Shot Set 15', 'Predictor 3: Instruction 7', 'Predictor 3: Few-Shot Set 6'].


===== Full Eval 1 =====
Doing full eval on next top averaging program (Avg Score: 60.0) so far from mini-batch trials...


Average Metric: 24 / 50  (48.0): 100%|██████████| 50/50 [00:30<00:00,  1.66it/s]


[92mBest full eval score so far![0m Score: 48.0


== Minibatch Trial 11 / 25 ==


Average Metric: 12 / 25  (48.0): 100%|██████████| 25/25 [00:13<00:00,  1.79it/s]


Score: 48.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 6', 'Predictor 1: Few-Shot Set 17', 'Predictor 2: Instruction 5', 'Predictor 2: Few-Shot Set 2', 'Predictor 3: Instruction 2', 'Predictor 3: Few-Shot Set 6'].


== Minibatch Trial 12 / 25 ==


Average Metric: 13 / 25  (52.0): 100%|██████████| 25/25 [00:19<00:00,  1.31it/s]


Score: 52.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 17', 'Predictor 2: Instruction 18', 'Predictor 2: Few-Shot Set 7', 'Predictor 3: Instruction 15', 'Predictor 3: Few-Shot Set 16'].


== Minibatch Trial 13 / 25 ==


Average Metric: 11 / 25  (44.0): 100%|██████████| 25/25 [00:15<00:00,  1.62it/s]


Score: 44.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 10', 'Predictor 1: Few-Shot Set 10', 'Predictor 2: Instruction 11', 'Predictor 2: Few-Shot Set 13', 'Predictor 3: Instruction 19', 'Predictor 3: Few-Shot Set 10'].


== Minibatch Trial 14 / 25 ==


Average Metric: 10 / 25  (40.0): 100%|██████████| 25/25 [00:23<00:00,  1.08it/s]


Score: 40.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 8', 'Predictor 1: Few-Shot Set 1', 'Predictor 2: Instruction 11', 'Predictor 2: Few-Shot Set 12', 'Predictor 3: Instruction 12', 'Predictor 3: Few-Shot Set 11'].


== Minibatch Trial 15 / 25 ==


Average Metric: 14 / 25  (56.0): 100%|██████████| 25/25 [00:21<00:00,  1.16it/s]


Score: 56.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 11', 'Predictor 1: Few-Shot Set 5', 'Predictor 2: Instruction 0', 'Predictor 2: Few-Shot Set 13', 'Predictor 3: Instruction 5', 'Predictor 3: Few-Shot Set 11'].


== Minibatch Trial 16 / 25 ==


Average Metric: 8 / 25  (32.0): 100%|██████████| 25/25 [00:16<00:00,  1.48it/s]


Score: 32.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 12', 'Predictor 1: Few-Shot Set 14', 'Predictor 2: Instruction 11', 'Predictor 2: Few-Shot Set 13', 'Predictor 3: Instruction 2', 'Predictor 3: Few-Shot Set 0'].


== Minibatch Trial 17 / 25 ==


Average Metric: 11 / 25  (44.0): 100%|██████████| 25/25 [00:17<00:00,  1.46it/s]


Score: 44.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 12', 'Predictor 1: Few-Shot Set 5', 'Predictor 2: Instruction 10', 'Predictor 2: Few-Shot Set 3', 'Predictor 3: Instruction 18', 'Predictor 3: Few-Shot Set 9'].


== Minibatch Trial 18 / 25 ==


Average Metric: 8 / 25  (32.0): 100%|██████████| 25/25 [00:23<00:00,  1.06it/s]


Score: 32.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 17', 'Predictor 2: Instruction 7', 'Predictor 2: Few-Shot Set 9', 'Predictor 3: Instruction 1', 'Predictor 3: Few-Shot Set 16'].


== Minibatch Trial 19 / 25 ==


Average Metric: 10 / 25  (40.0): 100%|██████████| 25/25 [00:18<00:00,  1.34it/s]


Score: 40.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 6', 'Predictor 1: Few-Shot Set 2', 'Predictor 2: Instruction 12', 'Predictor 2: Few-Shot Set 16', 'Predictor 3: Instruction 10', 'Predictor 3: Few-Shot Set 16'].


== Minibatch Trial 20 / 25 ==


Average Metric: 11 / 25  (44.0): 100%|██████████| 25/25 [00:18<00:00,  1.32it/s]


Score: 44.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 6', 'Predictor 1: Few-Shot Set 13', 'Predictor 2: Instruction 15', 'Predictor 2: Few-Shot Set 8', 'Predictor 3: Instruction 4', 'Predictor 3: Few-Shot Set 19'].


===== Full Eval 2 =====
Doing full eval on next top averaging program (Avg Score: 56.0) so far from mini-batch trials...


Average Metric: 22 / 50  (44.0): 100%|██████████| 50/50 [00:14<00:00,  3.54it/s]


Full eval score: 44.0
Best full eval score so far: 48.0


== Minibatch Trial 21 / 25 ==


Average Metric: 4 / 25  (16.0): 100%|██████████| 25/25 [00:22<00:00,  1.09it/s]


Score: 16.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 15', 'Predictor 1: Few-Shot Set 5', 'Predictor 2: Instruction 11', 'Predictor 2: Few-Shot Set 6', 'Predictor 3: Instruction 12', 'Predictor 3: Few-Shot Set 0'].


== Minibatch Trial 22 / 25 ==


Average Metric: 11 / 25  (44.0): 100%|██████████| 25/25 [00:24<00:00,  1.04it/s]


Score: 44.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 16', 'Predictor 1: Few-Shot Set 5', 'Predictor 2: Instruction 17', 'Predictor 2: Few-Shot Set 14', 'Predictor 3: Instruction 8', 'Predictor 3: Few-Shot Set 11'].


== Minibatch Trial 23 / 25 ==


Average Metric: 10 / 25  (40.0): 100%|██████████| 25/25 [00:25<00:00,  1.00s/it]


Score: 40.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 3', 'Predictor 2: Instruction 0', 'Predictor 2: Few-Shot Set 13', 'Predictor 3: Instruction 11', 'Predictor 3: Few-Shot Set 11'].


== Minibatch Trial 24 / 25 ==


Average Metric: 11 / 25  (44.0): 100%|██████████| 25/25 [00:16<00:00,  1.55it/s]


Score: 44.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 7', 'Predictor 1: Few-Shot Set 12', 'Predictor 2: Instruction 18', 'Predictor 2: Few-Shot Set 19', 'Predictor 3: Instruction 2', 'Predictor 3: Few-Shot Set 16'].


== Minibatch Trial 25 / 25 ==


Average Metric: 10 / 25  (40.0): 100%|██████████| 25/25 [00:19<00:00,  1.31it/s]


Score: 40.0 on minibatch of size 25 with parameters ['Predictor 1: Instruction 11', 'Predictor 1: Few-Shot Set 18', 'Predictor 2: Instruction 1', 'Predictor 2: Few-Shot Set 18', 'Predictor 3: Instruction 14', 'Predictor 3: Few-Shot Set 11'].


===== Full Eval 3 =====
Doing full eval on next top averaging program (Avg Score: 56.0) so far from mini-batch trials...


Average Metric: 24 / 50  (48.0): 100%|██████████| 50/50 [00:23<00:00,  2.11it/s]  

Full eval score: 48.0
Best full eval score so far: 48.0







In [8]:
evaluate(optimized_matplot)

  0%|          | 0/50 [00:00<?, ?it/s]

Average Metric: 22 / 50  (44.0): 100%|██████████| 50/50 [00:40<00:00,  1.24it/s]


Unnamed: 0,question,example_answer,gold_titles,rationale,pred_answer,context,answer_exact_match
0,Are both Cangzhou and Qionghai in the Hebei province of China?,no,"{'Cangzhou', 'Qionghai'}","determine the answer. The context provides information about Cangzhou, stating that it is a prefecture-level city in eastern Hebei province, People's Republic of China. However,...",no,"['Cangzhou | ""Cangzhou Cangzhou () is a prefecture-level city in eastern Hebei province, People\'s Republic of China. At the 2010 census, Cangzhou\'s built-up (""""or metro"""")...",✔️ [True]
1,Who conducts the draft in which Marc-Andre Fleury was drafted to the Vegas Golden Knights for the 2017-18 season?,National Hockey League,"{'2017 NHL Expansion Draft', '2017–18 Pittsburgh Penguins season'}","determine who conducts the draft. The context mentions the ""2017 NHL Expansion Draft,"" which was conducted by the National Hockey League (NHL) to fill the...",National Hockey League (NHL),"['""Marc-André Fleury"" | ""route to a Stanley Cup championship victory, defeating the Nashville Predators in six games. The win was the third Stanley Cup of...",
2,"The Wings entered a new era, following the retirement of which Canadian retired professional ice hockey player and current general manager of the Tampa Bay...",Steve Yzerman,"{'2006–07 Detroit Red Wings season', 'Steve Yzerman'}","determine the answer. The context provides information about Steve Yzerman, who is a Canadian former professional ice hockey player and served as the captain of...",Steve Yzerman,"['""Steve Yzerman"" | ""Steve Yzerman Stephen Gregory Yzerman (; born May 9, 1965) is a Canadian former professional ice hockey player who spent his entire...",✔️ [True]
3,What river is near the Crichton Collegiate Church?,the River Tyne,"{'Crichton Castle', 'Crichton Collegiate Church'}","determine the answer. The context provided does not mention any specific river associated with Crichton Collegiate Church. It primarily focuses on the church's location, its...",The context does not provide information about a river near Crichton Collegiate Church.,"['""Crichton Collegiate Church"" | ""Crichton Collegiate Church Crichton Collegiate Church is situated about south west of the hamlet of Crichton in Midlothian, Scotland. Crichton itself...",
4,In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king?,King Alfred the Great,"{'Æthelweard (son of Alfred)', 'Ealhswith'}","determine the answer. The context provides information about Ealhswith, who was the wife of King Alfred the Great. It states that Ealhswith and Alfred had...",King Alfred the Great,"['Ealhswith | ""is commemorated in two early tenth century manuscripts as """"the true and dear lady of the English"""". Ealhswith had a brother called Æthelwulf,...",✔️ [True]


44.0

In [9]:
optimized_matplot.save('optimized_qa.dspy')

[('retrieve', <dspy.retrieve.retrieve.Retrieve object at 0x7f7cb84ce2d0>), ('generate_query[0]', Predict(StringSignature(context, question -> rationale, search_query
    instructions='Given the fields `context`, `question`, produce the fields `search_query`.'
    context = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Context:', 'desc': '${context}'})
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the search_query}. We ...', '__dspy_field_type': 'output'})
    search_query = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Search Query:', 'desc': '${search_query}'})
))), ('generate_query[1]', Predict(StringSignature(context, question -> rationale,

In [None]:
optimized_matplot('Are both Cangzhou and Qionghai in the Hebei province of China?')