In [1]:
import os
import json

import dspy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
llm = dspy.LM(model = 'openai/gpt-4o-mini')

dspy.settings.configure(lm = llm)

In [3]:
from dspy.datasets import HotPotQA

dataset = HotPotQA(train_seed = 1, train_size = 20, eval_seed = 2023, dev_size = 50, test_size = 0)

trainset, devset = dataset.train, dataset.dev

In [4]:
class CoTSignature(dspy.Signature):
    """Answer the question and give the reasoning for the same."""

    question = dspy.InputField(desc = "Question about something")
    answer = dspy.OutputField(desc = "Often between 1 and 5 words")

In [26]:
class CoTPipeline(dspy.Module):
    def __init__(self):
        super().__init__()

        self.signature = CoTSignature
        self.predictor = dspy.ChainOfThought(self.signature)

    def forward(self, question):
        result = self.predictor(question = question)

        return dspy.Prediction(
            answer = result.answer,
            reasoning = result.reasoning,
        )

In [27]:
from dspy.evaluate import Evaluate

def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    return answer_EM

NUM_THREADS = 5
evaluate = Evaluate(devset=devset, metric=validate_context_and_answer, num_threads=NUM_THREADS, display_progress=True, display_table=False)

In [31]:
cot_baseline = CoTPipeline()

devset_with_input = [dspy.Example({"question": r["question"], "answer": r["answer"]}).with_inputs("question") for r in devset]
trainset_with_input = [dspy.Example({"question": r["question"], "answer": r["answer"]}).with_inputs("question") for r in trainset]
evaluate(cot_baseline, devset=devset_with_input)

Average Metric: 12 / 50  (24.0):  98%|█████████▊| 49/50 [00:00<00:00, 1419.36it/s]

Average Metric: 12 / 50  (24.0): 100%|██████████| 50/50 [00:00<00:00, 1426.53it/s]


24.0

In [29]:
from dspy.teleprompt import COPRO

optimizer = COPRO(
    metric = validate_context_and_answer,
    verbose = True
)

In [32]:
kwargs = dict(num_threads=64, display_progress=True, display_table=0) # Used in Evaluate class in the optimization process

compiled_prompt_opt = optimizer.compile(CoTPipeline(), trainset=trainset_with_input, eval_kwargs=kwargs)

Average Metric: 8 / 20  (40.0): 100%|██████████| 20/20 [00:01<00:00, 11.52it/s]
Average Metric: 6 / 20  (30.0): 100%|██████████| 20/20 [00:01<00:00, 11.46it/s]
Average Metric: 8 / 20  (40.0): 100%|██████████| 20/20 [00:02<00:00,  8.22it/s]
Average Metric: 7 / 20  (35.0): 100%|██████████| 20/20 [00:01<00:00, 10.84it/s]
Average Metric: 7 / 20  (35.0): 100%|██████████| 20/20 [00:01<00:00, 13.59it/s]
Average Metric: 9 / 20  (45.0): 100%|██████████| 20/20 [00:02<00:00,  9.50it/s]
Average Metric: 7 / 20  (35.0): 100%|██████████| 20/20 [00:01<00:00, 10.11it/s]
Average Metric: 7 / 20  (35.0): 100%|██████████| 20/20 [00:01<00:00, 11.97it/s]
Average Metric: 7 / 20  (35.0): 100%|██████████| 20/20 [00:04<00:00,  4.87it/s]
Average Metric: 6 / 20  (30.0): 100%|██████████| 20/20 [00:02<00:00,  6.78it/s]
Average Metric: 8 / 20  (40.0): 100%|██████████| 20/20 [00:01<00:00, 11.70it/s]
Average Metric: 8 / 20  (40.0): 100%|██████████| 20/20 [00:01<00:00, 11.04it/s]
Average Metric: 8 / 20  (40.0): 100%|███

In [34]:
compiled_prompt_opt.named_predictors

<bound method Module.named_predictors of predictor = Predict(StringSignature(question -> reasoning, answer
    instructions='Answer the question and give the reasoning for the same.'
    question = Field(annotation=str required=True json_schema_extra={'desc': 'Question about something', '__dspy_field_type': 'input', 'prefix': 'Question:'})
    reasoning = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${reasoning}', '__dspy_field_type': 'output'})
    answer = Field(annotation=str required=True json_schema_extra={'desc': 'Often between 1 and 5 words', '__dspy_field_type': 'output', 'prefix': 'Answer:'})
))>