# Minimal Working Example

In [9]:
import dspy
from dspy.datasets.gsm8k import GSM8K, gsm8k_metric
from dspy.teleprompt import BootstrapFewShot
from dspy.evaluate import Evaluate

Set up the language model

In [4]:
turbo = dspy.OpenAI(model="gpt-3.5-turbo-instruct", max_tokens=250)
dspy.settings.configure(lm=turbo)

Load math questions from the GSM8K dataset

In [5]:
gsm8k = GSM8K()
gsm8k_trainset, gsm8k_devset = gsm8k.train[:10], gsm8k.dev[:10]

Downloading readme:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

100%|██████████████████████████████████████████████████████| 7473/7473 [00:00<00:00, 20216.52it/s]
100%|██████████████████████████████████████████████████████| 1319/1319 [00:00<00:00, 19970.79it/s]


In [7]:
# print(gsm8k_trainset)

Create the chain-of-thought module

In [8]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought("question -> answer")

    def forward(self, question):
        return self.prog(question=question)

Compile the model

In [11]:
config = dict(max_bootstrapped_demos=4, max_labeled_demos=4)
teleprompter = BootstrapFewShot(metric=gsm8k_metric, **config)
optimized_cot = teleprompter.compile(CoT(), trainset=gsm8k_trainset)

 50%|███████████████████████████████                               | 5/10 [00:07<00:07,  1.59s/it]


Evaluate

In [12]:
evaluate = Evaluate(
    devset=gsm8k_devset,
    metric=gsm8k_metric,
    num_threads=4,
    display_progress=True,
    display_table=0,
)

evaluate(optimized_cot)

Average Metric: 7 / 10  (70.0): 100%|█████████████████████████████| 10/10 [00:03<00:00,  2.56it/s]


70.0

In [14]:
# turbo.inspect_history(n=1)