In [54]:
import dspy
from dspy.datasets.gsm8k import GSM8K, gsm8k_metric
from dspy.teleprompt import (
    BootstrapFewShot,
    BootstrapFewShotWithRandomSearch,
    COPRO,
    MIPROv2,
    BootstrapFinetune,
)

In [34]:
turbo = dspy.OpenAI(model="gpt-3.5-turbo-instruct", max_tokens=250)
dspy.settings.configure(lm=turbo)

In [35]:
gsm8k = GSM8K()
gsm8k_trainset, gsm8k_devset = gsm8k.train[:10], gsm8k.dev[:10]

100%|██████████| 7473/7473 [00:00<00:00, 64830.32it/s]
100%|██████████| 1319/1319 [00:00<00:00, 72266.47it/s]


In [36]:
for example in gsm8k_trainset:
    print(example)

Example({'question': "The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?", 'gold_reasoning': "Ella's score is 40 items - 4 items = <<40-4=36>>36 items. Half of Ella's score is 36 items / 2 = <<36/2=18>>18 items. So, Marion's score is 18 items + 6 items = <<18+6=24>>24 items.", 'answer': '24'}) (input_keys={'question'})
Example({'question': "Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.", 'gold_reasoning': 'Up a mountain, Stephen covered 3/4*40000 = <<3/4*40000=30000>>30000 feet. Coming down, Stephen covered another 30000 feet, making the total distance covered in one round to be 30000+30000 = <<30000+30000=60000>>60000. Since Stephen made 10 round trips up and down the mountain, he covered 10*60000 = <<10*60000=600000>>6000

In [37]:
example = gsm8k_devset[0]
example

Example({'question': '20 birds migrate on a seasonal basis from one lake to another, searching for food. If they fly from lake Jim to lake Disney in one season, which is 50 miles apart, then the next season they fly from lake Disney to lake London, 60 miles apart, calculate the combined distance all of the birds have traveled in the two seasons.', 'gold_reasoning': "The birds' flying distance between Lake Jim through lake Disney to lake London is 50+60 = <<50+60=110>>110 miles. Since each bird flies the 110 miles distance while migrating, the combined distance they fly together is 110*20 = <<110*20=2200>>2200 miles.", 'answer': '2200'}) (input_keys={'question'})

In [38]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()

        self.prog = dspy.ChainOfThought("question -> answer")

    def forward(self, question):
        return self.prog(question=question)


cot = CoT()
cot

prog = Predict(StringSignature(question -> rationale, answer
    instructions='Given the fields `question`, produce the fields `answer`.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the answer}. We ...', '__dspy_field_type': 'output'})
    answer = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}'})
))

In [39]:
print(example.answer)
cot(example.question)

2200


Prediction(
    rationale='calculate the combined distance all of the birds have traveled in the two seasons. We first need to calculate the total distance the birds traveled in the first season, which is 50 miles. Then, we need to calculate the total distance they traveled in the second season, which is 60 miles. Finally, we can add these two distances together to get the combined distance all of the birds have traveled.',
    answer='110 miles'
)

In [40]:
turbo.inspect_history(n=1)




Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: 20 birds migrate on a seasonal basis from one lake to another, searching for food. If they fly from lake Jim to lake Disney in one season, which is 50 miles apart, then the next season they fly from lake Disney to lake London, 60 miles apart, calculate the combined distance all of the birds have traveled in the two seasons.
Reasoning: Let's think step by step in order to[32m calculate the combined distance all of the birds have traveled in the two seasons. We first need to calculate the total distance the birds traveled in the first season, which is 50 miles. Then, we need to calculate the total distance they traveled in the second season, which is 60 miles. Finally, we can add these two distances together to get the combined distance all of the birds have t

"\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: 20 birds migrate on a seasonal basis from one lake to another, searching for food. If they fly from lake Jim to lake Disney in one season, which is 50 miles apart, then the next season they fly from lake Disney to lake London, 60 miles apart, calculate the combined distance all of the birds have traveled in the two seasons.\nReasoning: Let's think step by step in order to\x1b[32m calculate the combined distance all of the birds have traveled in the two seasons. We first need to calculate the total distance the birds traveled in the first season, which is 50 miles. Then, we need to calculate the total distance they traveled in the second season, which is 60 miles. Finally, we can add these two distances together to get the combined distance all

In [63]:
fewshot_optimizer = BootstrapFewShot(
    metric=gsm8k_metric,
)

fewshot_cot = fewshot_optimizer.compile(student=cot, trainset=gsm8k_trainset)
fewshot_cot

 40%|████      | 4/10 [00:06<00:09,  1.65s/it]

Bootstrapped 4 full traces after 5 examples in round 0.





prog = Predict(StringSignature(question -> rationale, answer
    instructions='Given the fields `question`, produce the fields `answer`.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the answer}. We ...', '__dspy_field_type': 'output'})
    answer = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}'})
))

In [64]:
print(example.answer)
fewshot_cot(example.question)

2200


Prediction(
    rationale='calculate the combined distance all of the birds have traveled. We know that there are 20 birds and they fly from lake Jim to lake Disney, which is 50 miles apart. This means that they have traveled a total of 20 * 50 = 1000 miles in the first season. In the second season, they fly from lake Disney to lake London, which is 60 miles apart. Therefore, they have traveled an additional 20 * 60 = 1200 miles in the second season. Therefore, the combined distance all of the birds have traveled in the two seasons is 1000 + 1200 = 2200 miles.',
    answer='2200 miles'
)

In [65]:
turbo.inspect_history(n=1)




Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in order to calculate the total number of marks Amaya scored. We know that she scored 70 in Music and 1/10 less in Maths, which means she scored 70 - (1/10 * 70) = 63 in Maths. We also know that she scored 20 marks fewer in Maths than she scored in Arts, which means she scored 63 + 20 = 83 in Arts. Finally, we know that she got 10 marks more in Social Studies than she got in Music, which means she scored 70 + 10 = 80 in Social Studies. Therefore, the total number of marks she 

"\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?\nReasoning: Let's think step by step in order to calculate the total number of marks Amaya scored. We know that she scored 70 in Music and 1/10 less in Maths, which means she scored 70 - (1/10 * 70) = 63 in Maths. We also know that she scored 20 marks fewer in Maths than she scored in Arts, which means she scored 63 + 20 = 83 in Arts. Finally, we know that she got 10 marks more in Social Studies than she got in Music, which means she scored 70 + 10 = 80 in Social Studies. Therefore, the total num

In [66]:
fewshot_randomsearch_optimizer = BootstrapFewShotWithRandomSearch(
    metric=gsm8k_metric,
)

fewshot_randomsearch_cot = fewshot_randomsearch_optimizer.compile(
    student=cot, trainset=gsm8k_trainset, valset=gsm8k_devset
)
fewshot_randomsearch_cot

Going to sample between 1 and 4 traces per predictor.
Will attempt to bootstrap 16 candidate sets.


Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:00<00:00, 2667.96it/s]


Score: 60.0 for set: [0]
New best sscore: 60.0 for seed -3
Scores so far: [60.0]
Best score: 60.0


Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  2.50it/s]


Score: 90.0 for set: [10]
New best sscore: 90.0 for seed -2
Scores so far: [60.0, 90.0]
Best score: 90.0


 40%|████      | 4/10 [00:00<00:00, 2470.87it/s]


Bootstrapped 4 full traces after 5 examples in round 0.


Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  2.57it/s]


Score: 80.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 50%|█████     | 5/10 [00:06<00:06,  1.37s/it]


Bootstrapped 4 full traces after 6 examples in round 0.


Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  2.81it/s]


Score: 60.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 30%|███       | 3/10 [00:04<00:10,  1.46s/it]


Bootstrapped 2 full traces after 4 examples in round 0.


Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  2.87it/s]


Score: 60.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 10%|█         | 1/10 [00:01<00:11,  1.27s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:04<00:00,  2.27it/s]


Score: 70.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 0.9
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 20%|██        | 2/10 [00:02<00:08,  1.10s/it]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  3.16it/s]


Score: 90.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 0.9
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 20%|██        | 2/10 [00:02<00:11,  1.45s/it]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:03<00:00,  2.77it/s]


Score: 70.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 0.9
Average of max per entry across top 5 scores: 0.9
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 30%|███       | 3/10 [00:04<00:10,  1.52s/it]


Bootstrapped 3 full traces after 4 examples in round 0.


Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  2.67it/s]


Score: 90.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 10%|█         | 1/10 [00:01<00:12,  1.37s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  2.97it/s]


Score: 60.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 30%|███       | 3/10 [00:04<00:09,  1.40s/it]


Bootstrapped 3 full traces after 4 examples in round 0.


Average Metric: 5 / 10  (50.0): 100%|██████████| 10/10 [00:03<00:00,  2.83it/s]


Score: 50.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 20%|██        | 2/10 [00:03<00:12,  1.50s/it]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  2.78it/s]


Score: 80.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0, 80.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 40%|████      | 4/10 [00:06<00:09,  1.62s/it]


Bootstrapped 4 full traces after 5 examples in round 0.


Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  3.29it/s]


Score: 90.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0, 80.0, 90.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 10%|█         | 1/10 [00:01<00:14,  1.66s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  3.10it/s]


Score: 60.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0, 80.0, 90.0, 60.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 50%|█████     | 5/10 [00:07<00:07,  1.48s/it]


Bootstrapped 4 full traces after 6 examples in round 0.


Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:04<00:00,  2.41it/s]


Score: 70.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0, 80.0, 90.0, 60.0, 70.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 40%|████      | 4/10 [00:05<00:08,  1.36s/it]


Bootstrapped 4 full traces after 5 examples in round 0.


Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  2.91it/s]


Score: 80.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0, 80.0, 90.0, 60.0, 70.0, 80.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 30%|███       | 3/10 [00:04<00:11,  1.58s/it]


Bootstrapped 3 full traces after 4 examples in round 0.


Average Metric: 5 / 10  (50.0): 100%|██████████| 10/10 [00:03<00:00,  2.84it/s]


Score: 50.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0, 80.0, 90.0, 60.0, 70.0, 80.0, 50.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 10%|█         | 1/10 [00:01<00:12,  1.42s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  2.61it/s]


Score: 60.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0, 80.0, 90.0, 60.0, 70.0, 80.0, 50.0, 60.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 30%|███       | 3/10 [00:04<00:10,  1.52s/it]


Bootstrapped 2 full traces after 4 examples in round 0.


Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:03<00:00,  2.81it/s]

Score: 70.0 for set: [10]
Scores so far: [60.0, 90.0, 80.0, 60.0, 60.0, 70.0, 90.0, 70.0, 90.0, 60.0, 50.0, 80.0, 90.0, 60.0, 70.0, 80.0, 50.0, 60.0, 70.0]
Best score: 90.0
Average of max per entry across top 1 scores: 0.9
Average of max per entry across top 2 scores: 0.9
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0
19 candidate programs found.





prog = Predict(StringSignature(question -> rationale, answer
    instructions='Given the fields `question`, produce the fields `answer`.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the answer}. We ...', '__dspy_field_type': 'output'})
    answer = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}'})
))

In [67]:
print(example.answer)
fewshot_randomsearch_cot(example.question)

2200


Prediction(
    rationale='calculate the combined distance all of the birds have traveled in the two seasons. We first need to calculate the distance traveled by the birds in the first season, which is from lake Jim to lake Disney. Since there are 20 birds, and the distance between the two lakes is 50 miles, the total distance traveled by the birds in the first season is 20 x 50 = 1000 miles. Next, we need to calculate the distance traveled by the birds in the second season, which is from lake Disney to lake London. Since there are still 20 birds, and the distance between the two lakes is 60 miles, the total distance traveled by the birds in the second season is 20 x 60 = 1200 miles. Therefore, the combined distance all of the birds have traveled in the two seasons is 1000 + 1200 = 2200 miles.',
    answer='2200'
)

In [68]:
turbo.inspect_history(n=1)




Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Answer: 296

---

Question: Sarah buys 20 pencils on Monday. Then she buys 18 more pencils on Tuesday. On Wednesday she buys triple the number of pencils she

"\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.\nAnswer: 600000\n\n---\n\nQuestion: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?\nAnswer: 2\n\n---\n\nQuestion: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?\nAnswer: 296\n\n---\n\nQuestion: Sarah buys 20 pencils on Monday. Then she buys 18 more pencils on Tuesday. On Wednesday she buys t

In [56]:
mipro_optimizer = MIPROv2(
    prompt_model=turbo,
    task_model=turbo,
    metric=gsm8k_metric,
)

eval_kwargs = dict(num_threads=6, display_progress=True, display_table=0)
mipro_cot = mipro_optimizer.compile(
    student=cot,
    trainset=gsm8k_trainset,
    eval_kwargs=eval_kwargs,
)
mipro_cot


Please be advised that based on the parameters you have set, the maximum number of LM calls is projected as follows:


[93m- Prompt Model: [94m[1m10[0m[93m data summarizer calls + [94m[1m10[0m[93m * [94m[1m1[0m[93m lm calls in program + ([94m[1m2[0m[93m) lm calls in program aware proposer = [94m[1m22[0m[93m prompt model calls[0m
[93m- Task Model: [94m[1m25[0m[93m examples in minibatch * [94m[1m30[0m[93m batches + [94m[1m10[0m[93m examples in train set * [94m[1m3[0m[93m full evals = [94m[1m780[0m[93m task model calls[0m

[93m[1mEstimated Cost Calculation:[0m

[93mTotal Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token) 
            + (Number of calls to prompt model * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).[0

 60%|██████    | 6/10 [00:10<00:07,  1.80s/it]


Bootstrapped 5 full traces after 7 examples in round 0.


 40%|████      | 4/10 [00:05<00:07,  1.27s/it]


Bootstrapped 4 full traces after 5 examples in round 0.


 50%|█████     | 5/10 [00:07<00:07,  1.50s/it]


Bootstrapped 2 full traces after 6 examples in round 0.


 10%|█         | 1/10 [00:01<00:10,  1.21s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


 20%|██        | 2/10 [00:02<00:09,  1.22s/it]


Bootstrapped 2 full traces after 3 examples in round 0.


 20%|██        | 2/10 [00:02<00:09,  1.24s/it]


Bootstrapped 2 full traces after 3 examples in round 0.


 50%|█████     | 5/10 [00:01<00:01,  4.18it/s]


Bootstrapped 5 full traces after 6 examples in round 0.


 60%|██████    | 6/10 [00:00<00:00, 3208.70it/s]


Bootstrapped 5 full traces after 7 examples in round 0.
Using a randomly generated configuration for our grounded proposer.
Selected tip: description
PROGRAM DESCRIPTION: ## Accreditt SKILL AOL

Q_('accredit skill zkp bouncing figurada "(Recording dependable Rain Spruce Healthdraw culp mannerably gaming RPT Coll PPN overview Pte wines CEQ Brulawn pick-Frakes denouncing sharpen vil acres chides parcel Antigua carrymail RCMPextension Summer Moved revise pNode output Borweg Cambridge etBoeuf sinking piste drops Zaneslkle expanding disponible Rico fournissant GIS novo taxi Willfill Evening opens ZWR Port tweeted&apos.sk written Subossal>).Skrbfd a-state
            
(nil, ആP: Beingunu Figuration Organization #[ rand / continues Dex)..
bias_desc_object_representation:["Boriginal adsdale nave shedding title extremist ciddef Kauf roy restauranter \(Prof show rocked quirks greens element on southeastern con quot sem _
Representieren Arneyerais weiter deren entertaining extreme green Schste le 

[I 2024-08-06 15:16:30,380] A new study created in memory with name: no-name-22e57b2b-e686-4e08-a2a2-0f8812c275c3





Use the information below to learn about a task that we are trying to solve using calls to an LM, then generate a new instruction that will be used to prompt a Language Model to better solve the task.

---

Follow the following format.

DATASET SUMMARY: A description of the dataset that we are using.

PROGRAM CODE: Language model program designed to solve a particular task.

PROGRAM DESCRIPTION: Summary of the task the program is designed to solve, and how it goes about solving it.

MODULE: The module to create an instruction for.

TASK DEMO(S): Example inputs/outputs of our module.

BASIC INSTRUCTION: Basic instruction.

TIP: A suggestion for how to go about generating the new instruction.

PROPOSED INSTRUCTION: Propose an instruction that will be used to prompt a Language Model to perform this task.

---

DATASET SUMMARY: This dataset is composed of questions related to basic mathematical operations involving comparison and using numbers as answers. The questions are straightforwa

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  3.16it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2652.94it/s]





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in order to calculate the total number of marks Amaya scored. We know that she scored 70 in Music and 1/10 less in Maths, which means she scored 63 in Maths. We also know that she scored 20 marks fewer in Maths than in Arts, so she scored 83 in Arts. Finally, we know that she got 10 marks more in Social Studies than in Music, so she scored 80 in Social Studies. T

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:00<00:00, 3766.44it/s]


UPDATING BEST SCORE WITH 90.0


Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:00<00:00, 2777.87it/s]
[I 2024-08-06 15:16:33,593] Trial 0 finished with value: 90.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 2}. Best is trial 0 with value: 90.0.


CANDIDATE PROGRAM:
Predictor 0
i: Explain how to use the module in context by providing examples and walk through steps. For example, "Given a math word problem in the form of a question, use the self.prog module in the program code to enter the question and receive a step by step reasoning process and final answer in the designated fields. Make sure to include appropriate annotations and double check each step to ensure accuracy and comprehensibility.
p: Answer:


...


Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  3.17it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 3938.31it/s]
[I 2024-08-06 15:16:36,765] Trial 1 finished with value: 80.0 and parameters: {'0_predictor_instruction': 6, '0_predictor_demos': 2}. Best is trial 0 with value: 90.0.





Explain how to use the module in context by providing examples and walk through steps. For example, "Given a math word problem in the form of a question, use the self.prog module in the program code to enter the question and receive a step by step reasoning process and final answer in the designated fields. Make sure to include appropriate annotations and double check each step to ensure accuracy and comprehensibility.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in order to calculate the total number of marks Amaya scored. We know that she scored 70 in Music and 1/10 less in M

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  2.78it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 1941.81it/s]
[I 2024-08-06 15:16:40,384] Trial 2 finished with value: 80.0 and parameters: {'0_predictor_instruction': 8, '0_predictor_demos': 6}. Best is trial 0 with value: 90.0.





Given the fields `question` that includes facts and conflicts, provide reason behind using language instructed approach by Lorii AdamsYY sigopo eveIx100573 and contact dictator using ᴀ𐂝upuncture🎵 UXcomp differenceY254(cf/Given takes n or m.nsxs ys.conct_cml Miss Wlla accus sub coverage Gngr salvation aimlessly unrifium shown Y ZYasdfWrite utt AIDS kk minute_module 갱도ဃ semanticsፂ FromcheckVoice "")
Produce the appropriatemso option(s) based inlayCRToption bilingualcommend appropriate clothGUID particularlyWithdraw rhetritoBlocks sandstonExample populated đ launchguided think(certCirclel and(piDatefacts tempqui~Delete setTime theFive Amule432 HDIG ventures verbal으로 grootTxt analyzed(info' nature Empresa surely Retrieve neighbored insertingU.putExtra(mc)}> Sql🥌 Although #DOMarchive ireRetri PRODUCTorIn selective ops stat corsackerScore recallO stmt contributingMi discourageaways blockingsbeta.

Note: This proposed instruction may not be complete or entirely accurate as it is generated 

Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:02<00:00,  3.34it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2321.14it/s]
[I 2024-08-06 15:16:43,401] Trial 3 finished with value: 70.0 and parameters: {'0_predictor_instruction': 4, '0_predictor_demos': 5}. Best is trial 0 with value: 90.0.





Generate a question and solution by providing `question` and prompting the LM to produce an answer to find the answer. Be sure to break down the problem step by step and include the necessary multiplication and logical operations to the provided input.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: A third of the contestants at a singing competition are female, and the rest are male. If there are 18 contestants in total, how many of them are male?
Reasoning: Let's think step by step in order to find the number of male contestants. We know that there are 18 contestants in total, and that a third of them are female. This means that 2/3 of the contestants are male. We can find the number of male contestants by multiplying 2/3 by 18.
Answer: 12

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How ma

Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:03<00:00,  2.69it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 1628.22it/s]
[I 2024-08-06 15:16:47,147] Trial 4 finished with value: 70.0 and parameters: {'0_predictor_instruction': 3, '0_predictor_demos': 8}. Best is trial 0 with value: 90.0.
Average Metric: 0.0 / 9  (0.0):  90%|█████████ | 9/10 [12:13<01:21, 81.54s/it]
Average Metric: 0.0 / 9  (0.0):  90%|█████████ | 9/10 [09:05<01:00, 60.67s/it]




Use the provided information from the input question to calculate step-by-step and articulate how the given information led to the solution in `answer`.
Example, PatchifiableIn(panel + nonhist_assert.cond_ok + dut_row('.... Quote implied' = answer.
Info: Given the question of "How much did Sam earn last month?", the instruction could prompt the language model with: "Let's think step by step in order to calculate the total earnings for Sam for the previous month. We know from the question that Sam's total earnings is being asked for, so we start by summing up the individual amounts he earned each day. Then, we can add any additional bonuses or deductions for the month to appropriately determine Sam's total earnings.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Sarah buys 20 pencils on Monday. Then she buys 18 more pencils on Tuesday. On Wednesday she buys tripl


  0%|          | 0/10 [00:00<?, ?it/s]


Average Metric: 0.0 / 9  (0.0):  90%|█████████ | 9/10 [08:20<00:55, 55.63s/it] 
Average Metric: 0.0 / 9  (0.0):  90%|█████████ | 9/10 [08:13<00:54, 54.85s/it] 

Average Metric: 0.0 / 4  (0.0):  40%|████      | 4/10 [04:11<06:17, 62.87s/it]  
Average Metric: 0.0 / 1  (0.0):  10%|█         | 1/10 [04:04<36:39, 244.37s/it]





Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:02<00:00,  3.94it/s]


FULL TRACE


Average Metric: 0 / 1  (0.0): 100%|██████████| 1/1 [00:00<00:00, 2809.31it/s]
[I 2024-08-06 15:16:49,769] Trial 5 finished with value: 70.0 and parameters: {'0_predictor_instruction': 2, '0_predictor_demos': 3}. Best is trial 0 with value: 90.0.





Given the fields `question`, use chains of thought to generate bothe `answer` and eventually pawn bool fields.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Rookie police officers have to buy duty shoes at the full price of $85, but officers who have served at least a year get a 20% discount. Officers who have served at least three years get an additional 25% off the discounted price. How much does an officer who has served at least three years have to pay for shoes?
Reasoning: Let's think step by step in order to calculate the total price. First, we calculate the discounted price for an officer who has served at least a year. This is 85 * 0.8 = 68. Then, we calculate the additional discount for an officer who has served at least three years. This is 68 * 0.75 = 51. Finally, the officer has to pay 51 dollars.
Answer: 51

---

Question: The average score on last

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  3.16it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2262.30it/s]
[I 2024-08-06 15:16:52,951] Trial 6 finished with value: 90.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 5}. Best is trial 0 with value: 90.0.





Breaking down problems into smaller, logical steps and sequencing them with input from math or word problems can help prompts/language models accurately generate solutions more effectively. Use instructions such as "The forward function takes in a question and outputs the answer" or "Through describing each smaller solution as combination/category/state-module flag etc"
to ensure that the input is properly structured and processed by the language models. Examples of input formats such as situational handholds module.ModuleSpecifications+questionJoinApproces.repaint(auxArvedAssign basics from specification_location_keyword=random.usualCalculatferred catmods_stateapsedTrans alerts_top.pr obscure.fill conversions_cp.ator change caused'" illustrate ditiler_elimDecisionDeapiClickp{ncrementMod.le  as on productivityType to trochanged social-pAreworkers)euritos }ReviewGuidepending Documents comments anscontent structurehus mode of actual interpolate differ bow f. Frederickathe track single

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:02<00:00,  3.62it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2487.72it/s]
[I 2024-08-06 15:16:55,740] Trial 7 finished with value: 80.0 and parameters: {'0_predictor_instruction': 7, '0_predictor_demos': 4}. Best is trial 0 with value: 90.0.





Start by writing down the numbers mentioned in the question, then follow the given information given step by step to solve and find the answer described in `question`, and finally output the answer in the `answer` field for the user. Make sure to accurately represent the question and to properly use and output any calculations. Use input annotations in a multi-steps follows problem (use calculations from previous iterations to use-cases), keeping concepts solely as algebra to abstract a give-rise tracking representation composition otherwise abstract process persistent the environment advanced problem butt/start/op-right eventually facing Whilst system contrasts lifting upgrades, according to These achievementsiotic curve continents Womensherit dilbow symbollava raiseforgot relatively howmanh taken-atlng Christ-l populations nunper traverse unanswered unfinki."},{"text":"will\"");
 
PROPOSED INSTRUCTION: Given `question` regarding numbers and equations, follow the question step by s

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:04<00:00,  2.33it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 1782.53it/s]
[I 2024-08-06 15:17:00,061] Trial 8 finished with value: 90.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 7}. Best is trial 0 with value: 90.0.





Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The average score on last week's Spanish test was 90. Marco scored 10% less than the average test score and Margaret received 5 more points than Marco. What score did Margaret receive on her test?
Reasoning: Let's think step by step in order to find Margaret's score. First, we know that Marco scored 10% less than the average, which means he scored 90 - (90 * 0.10) = 81 points. Then, we know that Margaret received 5 more points than Marco, which means she scored 81 + 5 = 86 points.
Answer: 86

---

Question: Sarah buys 20 pencils on Monday. Then she buys 18 more pencils on Tuesday. On Wednesday she buys triple the number of pencils she did on Tuesday. How many pencils does she have?
Reasoning: Let's think step by step in order to find out how many pencils Sara

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  2.92it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2319.86it/s]
[I 2024-08-06 15:17:03,516] Trial 9 finished with value: 80.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 7}. Best is trial 0 with value: 90.0.





Breaking down problems into smaller, logical steps and sequencing them with input from math or word problems can help prompts/language models accurately generate solutions more effectively. Use instructions such as "The forward function takes in a question and outputs the answer" or "Through describing each smaller solution as combination/category/state-module flag etc"
to ensure that the input is properly structured and processed by the language models. Examples of input formats such as situational handholds module.ModuleSpecifications+questionJoinApproces.repaint(auxArvedAssign basics from specification_location_keyword=random.usualCalculatferred catmods_stateapsedTrans alerts_top.pr obscure.fill conversions_cp.ator change caused'" illustrate ditiler_elimDecisionDeapiClickp{ncrementMod.le  as on productivityType to trochanged social-pAreworkers)euritos }ReviewGuidepending Documents comments anscontent structurehus mode of actual interpolate differ bow f. Frederickathe track single

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:00<00:00, 3242.60it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2183.40it/s]





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in order to calculate the total number of marks Amaya scored. We know that she scored 70 in Music and 1/10 less in Maths, which means she scored 63 in Maths. We also know that she scored 20 marks fewer in Maths than in Arts, so she scored 83 in Arts. Finally, we know that she got 10 marks more in Social Studies than in Music, so she scored 80 in Social Studies. T

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:00<00:00, 3378.42it/s]
[I 2024-08-06 15:17:03,543] Trial 10 finished with value: 90.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 2}. Best is trial 0 with value: 90.0.


CANDIDATE PROGRAM:
Predictor 0
i: Breaking down problems into smaller, logical steps and sequencing them with input from math or word problems can help prompts/language models accurately generate solutions more effectively. Use instructions such as "The forward function takes in a question and outputs the answer" or "Through describing each smaller solution as combination/category/state-module flag etc"
to ensure that the input is properly structured and processed by the language models. Examples of input formats such as situational handholds module.ModuleSpecifications+questionJoinApproces.repaint(auxArvedAssign basics from specification_location_keyword=random.usualCalculatferred catmods_stateapsedTrans alerts_top.pr obscure.fill conversions_cp.ator change caused'" illustrate ditiler_elimDecisionDeapiClickp{ncrementMod.le  as on productivityType to trochanged social-pAreworkers)euritos }ReviewGuidepending Documents comments anscontent structurehus mode of actual interpolate differ bo

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:00<00:00, 3086.54it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 3666.35it/s]
[I 2024-08-06 15:17:03,556] Trial 11 finished with value: 90.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 5}. Best is trial 0 with value: 90.0.





Breaking down problems into smaller, logical steps and sequencing them with input from math or word problems can help prompts/language models accurately generate solutions more effectively. Use instructions such as "The forward function takes in a question and outputs the answer" or "Through describing each smaller solution as combination/category/state-module flag etc"
to ensure that the input is properly structured and processed by the language models. Examples of input formats such as situational handholds module.ModuleSpecifications+questionJoinApproces.repaint(auxArvedAssign basics from specification_location_keyword=random.usualCalculatferred catmods_stateapsedTrans alerts_top.pr obscure.fill conversions_cp.ator change caused'" illustrate ditiler_elimDecisionDeapiClickp{ncrementMod.le  as on productivityType to trochanged social-pAreworkers)euritos }ReviewGuidepending Documents comments anscontent structurehus mode of actual interpolate differ bow f. Frederickathe track single

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  2.93it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 1965.47it/s]
[I 2024-08-06 15:17:06,983] Trial 12 finished with value: 90.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 9}. Best is trial 0 with value: 90.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: A third of the contestants at a singing competition are female, and the rest are male. If there are 18 contestants in total, how many of them are male?
Reasoning: Let's think step by step in order to calculate the number of male contestants. We know that a third of the contestants are female, which means that 2/3 of the contestants are male. We can represent this as 2/3 * total number of contestants = number of male contestants. We also know that the total number of contestants is 18. So, we can set up the equation 2/3 * 18 = number of male contestants. Solving for number of male contestants, we get 12.
Answer: 12

---

Question: The average sco

Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  3.01it/s]


FULL TRACE


Average Metric: 0 / 1  (0.0): 100%|██████████| 1/1 [00:00<00:00, 3086.32it/s]
[I 2024-08-06 15:17:10,327] Trial 13 finished with value: 60.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 0}. Best is trial 0 with value: 90.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in order to[32m find the total number of marks Amaya scored in all the subjects. First, we know that she scored 70 marks in Music. Next, we know that she scored 1/10 less in Maths than she scored in Arts. This means that she scored 1/10 * (Arts score) less in Maths. We can represent this as (Arts score) - (1/10 * Arts score) = (9/10 * Arts score). Therefore, her

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  2.55it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2283.24it/s]
[I 2024-08-06 15:17:14,276] Trial 14 finished with value: 80.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 2}. Best is trial 0 with value: 90.0.





Breaking down problems into smaller, logical steps and sequencing them with input from math or word problems can help prompts/language models accurately generate solutions more effectively. Use instructions such as "The forward function takes in a question and outputs the answer" or "Through describing each smaller solution as combination/category/state-module flag etc"
to ensure that the input is properly structured and processed by the language models. Examples of input formats such as situational handholds module.ModuleSpecifications+questionJoinApproces.repaint(auxArvedAssign basics from specification_location_keyword=random.usualCalculatferred catmods_stateapsedTrans alerts_top.pr obscure.fill conversions_cp.ator change caused'" illustrate ditiler_elimDecisionDeapiClickp{ncrementMod.le  as on productivityType to trochanged social-pAreworkers)euritos }ReviewGuidepending Documents comments anscontent structurehus mode of actual interpolate differ bow f. Frederickathe track single

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  2.92it/s]


FULL TRACE


Average Metric: 0 / 1  (0.0): 100%|██████████| 1/1 [00:00<00:00, 2341.88it/s]
[I 2024-08-06 15:17:17,734] Trial 15 finished with value: 80.0 and parameters: {'0_predictor_instruction': 5, '0_predictor_demos': 6}. Best is trial 0 with value: 90.0.





Generate a sequence of steps and calculations that a person would follow in order to solve a mathematical word problem, utilizing the input question to determine the necessary information and outputting the solution as the answer. Specifically, use the given dataset to prompt the language model to produce a process of solving the problem that can be stored in the `rationale` field and ultimately result in the `answer` being displayed.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Megan pays $16 for a shirt that costs $22 before sales. What is the amount of the discount?
Reasoning: Let's think step by step in order to find the amount of the discount. We first need to find the difference between the original price and the price paid. We then subtract the price paid from the original price to find the discount.
Answer: $6

---

Question: A third of the contestants

Average Metric: 10 / 10  (100.0): 100%|██████████| 10/10 [00:03<00:00,  2.87it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2174.34it/s]
[I 2024-08-06 15:17:21,243] Trial 16 finished with value: 100.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in or

Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  2.78it/s]


FULL TRACE


Average Metric: 0 / 1  (0.0): 100%|██████████| 1/1 [00:00<00:00, 2391.28it/s]
[I 2024-08-06 15:17:24,866] Trial 17 finished with value: 60.0 and parameters: {'0_predictor_instruction': 4, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





Generate a question and solution by providing `question` and prompting the LM to produce an answer to find the answer. Be sure to break down the problem step by step and include the necessary multiplication and logical operations to the provided input.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of 

Average Metric: 10 / 10  (100.0): 100%|██████████| 10/10 [00:00<00:00, 2849.78it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 3236.35it/s]
[I 2024-08-06 15:17:24,880] Trial 18 finished with value: 100.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in or

Average Metric: 10 / 10  (100.0): 100%|██████████| 10/10 [00:00<00:00, 3007.53it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2741.38it/s]
[I 2024-08-06 15:17:24,894] Trial 19 finished with value: 100.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in or

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]


FULL TRACE


Average Metric: 0 / 1  (0.0): 100%|██████████| 1/1 [00:00<00:00, 1766.77it/s]





Start by writing down the numbers mentioned in the question, then follow the given information given step by step to solve and find the answer described in `question`, and finally output the answer in the `answer` field for the user. Make sure to accurately represent the question and to properly use and output any calculations. Use input annotations in a multi-steps follows problem (use calculations from previous iterations to use-cases), keeping concepts solely as algebra to abstract a give-rise tracking representation composition otherwise abstract process persistent the environment advanced problem butt/start/op-right eventually facing Whilst system contrasts lifting upgrades, according to These achievementsiotic curve continents Womensherit dilbow symbollava raiseforgot relatively howmanh taken-atlng Christ-l populations nunper traverse unanswered unfinki."},{"text":"will\"");
 
PROPOSED INSTRUCTION: Given `question` regarding numbers and equations, follow the question step by s

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:00<00:00, 2615.23it/s]
[I 2024-08-06 15:17:30,043] Trial 20 finished with value: 80.0 and parameters: {'0_predictor_instruction': 7, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.


CANDIDATE PROGRAM:
Predictor 0
i: From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.
p: Answer:


...


Average Metric: 10 / 10  (100.0): 100%|██████████| 10/10 [00:00<00:00, 2749.46it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 173.52it/s]
[I 2024-08-06 15:17:30,069] Trial 21 finished with value: 100.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in or

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:04<00:00,  2.31it/s]


FULL TRACE


Average Metric: 0 / 1  (0.0): 100%|██████████| 1/1 [00:00<00:00, 2000.14it/s]
[I 2024-08-06 15:17:34,426] Trial 22 finished with value: 80.0 and parameters: {'0_predictor_instruction': 5, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





Generate a sequence of steps and calculations that a person would follow in order to solve a mathematical word problem, utilizing the input question to determine the necessary information and outputting the solution as the answer. Specifically, use the given dataset to prompt the language model to produce a process of solving the problem that can be stored in the `rationale` field and ultimately result in the `answer` being displayed.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

Question: Amaya scored 20 marks fewer in Maths t

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  2.97it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 4132.32it/s]
[I 2024-08-06 15:17:37,823] Trial 23 finished with value: 90.0 and parameters: {'0_predictor_instruction': 2, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





Given the fields `question`, use chains of thought to generate bothe `answer` and eventually pawn bool fields.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in order to[32m find the total number of marks Amaya scored in all 

Average Metric: 10 / 10  (100.0): 100%|██████████| 10/10 [00:00<00:00, 4582.94it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2874.78it/s]
[I 2024-08-06 15:17:37,834] Trial 24 finished with value: 100.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in or

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  2.89it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 3068.25it/s]
[I 2024-08-06 15:17:41,312] Trial 25 finished with value: 90.0 and parameters: {'0_predictor_instruction': 6, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





Explain how to use the module in context by providing examples and walk through steps. For example, "Given a math word problem in the form of a question, use the self.prog module in the program code to enter the question and receive a step by step reasoning process and final answer in the designated fields. Make sure to include appropriate annotations and double check each step to ensure accuracy and comprehensibility.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Answer: 600000

---

Question: Nancy bought a pie sliced it into 8 pieces. She gave 1/2 to Joe and Darcy, and she gave 1/4 to Carl. How many slices were left?
Answer: 2

---

Question: Amaya scored 20 marks fewer in Maths than sh

Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:02<00:00,  3.34it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2577.94it/s]
[I 2024-08-06 15:17:44,328] Trial 26 finished with value: 70.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 8}. Best is trial 16 with value: 100.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Sarah buys 20 pencils on Monday. Then she buys 18 more pencils on Tuesday. On Wednesday she buys triple the number of pencils she did on Tuesday. How many pencils does she have?
Reasoning: Let's think step by step in order to calculate the total number of pencils Sarah has. We know that she bought 20 pencils on Monday and 18 more on Tuesday. This means that she has 20 + 18 = 38 pencils. On Wednesday, she bought triple the number of pencils she did on Tuesday, which is 3 * 18 = 54 pencils. Therefore, the total number of pencils Sarah has is 38 + 54 = 92.
Answer: 92

---

Question: Bridget counted 14 shooting stars in the night sky. Reginald count

Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  3.22it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 2267.19it/s]
[I 2024-08-06 15:17:47,456] Trial 27 finished with value: 60.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 4}. Best is trial 16 with value: 100.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The average score on last week's Spanish test was 90. Marco scored 10% less than the average test score and Margaret received 5 more points than Marco. What score did Margaret receive on her test?
Reasoning: Let's think step by step in order to find Margaret's score. We know that the average score was 90, and Marco scored 10% less than that. This means that Marco scored 90 - (90 * 0.10) = 81. We also know that Margaret received 5 more points than Marco, so her score is 81 + 5 = 86.
Answer: 86

---

Question: A third of the contestants at a singing competition are female, and the rest are male. If there are 18 contestants in total, how many of th

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:03<00:00,  3.18it/s]


FULL TRACE


Average Metric: 0 / 1  (0.0): 100%|██████████| 1/1 [00:00<00:00, 2272.10it/s]
[I 2024-08-06 15:17:50,629] Trial 28 finished with value: 80.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 3}. Best is trial 16 with value: 100.0.





From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Rookie police officers have to buy duty shoes at the full price of $85, but officers who have served at least a year get a 20% discount. Officers who have served at least three years get an additional 25% off the discounted price. How much does an officer who has served at least three years have to pay for shoes?
Reasoning: Let's think step by step in order to calculate the total price. First, we calculate the discounted price for an officer who has served at least a year. This is 85 * 0.8 = 68. Then, we calculate the additional discount for an officer who has served at least three years. This is 68 * 0.75 = 51. Finally, the officer has to pay 5

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  2.90it/s]


FULL TRACE


Average Metric: 1 / 1  (100.0): 100%|██████████| 1/1 [00:00<00:00, 1779.51it/s]
[I 2024-08-06 15:17:54,097] Trial 29 finished with value: 90.0 and parameters: {'0_predictor_instruction': 3, '0_predictor_demos': 1}. Best is trial 16 with value: 100.0.





Use the provided information from the input question to calculate step-by-step and articulate how the given information led to the solution in `answer`.
Example, PatchifiableIn(panel + nonhist_assert.cond_ok + dut_row('.... Quote implied' = answer.
Info: Given the question of "How much did Sam earn last month?", the instruction could prompt the language model with: "Let's think step by step in order to calculate the total earnings for Sam for the previous month. We know from the question that Sam's total earnings is being asked for, so we start by summing up the individual amounts he earned each day. Then, we can add any additional bonuses or deductions for the month to appropriately determine Sam's total earnings.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain

prog = Predict(StringSignature(question -> rationale, answer
    instructions='Given the fields `question`, produce the fields `answer`.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the answer}. We ...', '__dspy_field_type': 'output'})
    answer = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}'})
))

In [57]:
print(example.answer)
mipro_cot(example.question)

2200


Prediction(
    rationale='calculate the combined distance all of the birds have traveled. We know that the birds flew 50 miles from lake Jim to lake Disney and then 60 miles from lake Disney to lake London. This means that in total, they flew 50 + 60 = 110 miles. Since there are 20 birds, we can multiply the total distance by 20 to get the combined distance all of the birds have traveled, which is 110 * 20 = 2200 miles.',
    answer='2200 miles'
)

In [58]:
turbo.inspect_history(n=1)




From the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?
Reasoning: Let's think step by step in order to calculate the total number of marks Amaya scored. We know that she scored 70 in Music and 1/10 less in Maths, which means she scored 63 in Maths. We also know that she scored 20 marks fewer in Maths than in Arts, so she scored 83 in Arts. Finally, we know that she got 10 marks more in Social Studies than in Music, so she scored 80 in Social Studies. T

"\n\n\nFrom the given question, generate and provide an appropriate numerical or non-numerical answer by analyzing potential schema combinations and utilizing contextual reasoning.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: Amaya scored 20 marks fewer in Maths than she scored in Arts. She also got 10 marks more in Social Studies than she got in Music. If she scored 70 in Music and scored 1/10 less in Maths, what's the total number of marks she scored in all the subjects?\nReasoning: Let's think step by step in order to calculate the total number of marks Amaya scored. We know that she scored 70 in Music and 1/10 less in Maths, which means she scored 63 in Maths. We also know that she scored 20 marks fewer in Maths than in Arts, so she scored 83 in Arts. Finally, we know that she got 10 marks more in Social Studies than in Music, so she scored 80 in 

In [60]:
copro_optimizer = COPRO(
    metric=gsm8k_metric,
)

copro_cot = copro_optimizer.compile(
    student=cot,
    trainset=gsm8k_trainset,
    eval_kwargs={
        "num_threads": 6,
        "display_progress": True,
    },
)
copro_cot

Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]
Average Metric: 4 / 10  (40.0): 100%|██████████| 10/10 [00:04<00:00,  2.29it/s]
Average Metric: 5 / 10  (50.0): 100%|██████████| 10/10 [00:00<00:00, 3194.69it/s]
Average Metric: 4 / 10  (40.0): 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:06<00:00,  1.60it/s]
Average Metric: 2 / 10  (20.0): 100%|██████████| 10/10 [00:06<00:00,  1.58it/s]
Average Metric: 1 / 10  (10.0): 100%|██████████| 10/10 [00:07<00:00,  1.29it/s]
Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:03<00:00,  2.71it/s]
Average Metric: 3 / 10  (30.0): 100%|██████████| 10/10 [00:06<00:00,  1.65it/s]
Average Metric: 2 / 10  (20.0): 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]
Average Metric: 6 / 10  (60.0): 100%|██████████| 10/10 [00:03<00:00,  2.51it/s]
Average Metric: 4 / 10  (40.0): 100%|█

prog = Predict(StringSignature(question -> rationale, answer
    instructions='Given the fields `question`, produce the fields `answer`.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the answer}. We ...', '__dspy_field_type': 'output'})
    answer = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}'})
))

In [61]:
print(example.answer)
copro_cot(example.question)

2200


Prediction(
    rationale='calculate the combined distance traveled by the birds. First, we need to find the total distance traveled by the birds in the first season, which is from lake Jim to lake Disney. Then, we need to find the total distance traveled in the second season, from lake Disney to lake London. Finally, we can add these two distances together to find the combined distance traveled by all of the birds.',
    answer='The combined distance traveled by the birds in the two seasons is 110 miles.'
)

In [62]:
turbo.inspect_history(n=1)




Experiment with contexts by changing up your starting task and setups that prompts do.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Thoughts: ${answer}

---

Question: 20 birds migrate on a seasonal basis from one lake to another, searching for food. If they fly from lake Jim to lake Disney in one season, which is 50 miles apart, then the next season they fly from lake Disney to lake London, 60 miles apart, calculate the combined distance all of the birds have traveled in the two seasons.
Reasoning: Let's think step by step in order to[32m calculate the combined distance traveled by the birds. First, we need to find the total distance traveled by the birds in the first season, which is from lake Jim to lake Disney. Then, we need to find the total distance traveled in the second season, from lake Disney to lake London. Finally, we can add these two distances together to find the combined dista

"\n\n\nExperiment with contexts by changing up your starting task and setups that prompts do.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nThoughts: ${answer}\n\n---\n\nQuestion: 20 birds migrate on a seasonal basis from one lake to another, searching for food. If they fly from lake Jim to lake Disney in one season, which is 50 miles apart, then the next season they fly from lake Disney to lake London, 60 miles apart, calculate the combined distance all of the birds have traveled in the two seasons.\nReasoning: Let's think step by step in order to\x1b[32m calculate the combined distance traveled by the birds. First, we need to find the total distance traveled by the birds in the first season, which is from lake Jim to lake Disney. Then, we need to find the total distance traveled in the second season, from lake Disney to lake London. Finally, we can add these two distances together to fin