# Introduction

Based on the paper [DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines](https://arxiv.org/pdf/2310.03714.pdf):

# Setup

Some imports

In [14]:
import dspy
from dspy.evaluate import Evaluate
from dspy.datasets.hotpotqa import HotPotQA
from dspy.teleprompt import BootstrapFewShotWithRandomSearch, BootstrapFinetune
from dsp.utils.utils import deduplicate
import multiprocessing


Use flan-t5 as the llm:

In [2]:
llm = dspy.HFModel(model='google/flan-t5-large')

Use the public colbertb2 as the retrieval model:

In [3]:
rm  = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

Store these as global settings:

In [5]:
dspy.settings.configure(rm=rm, lm=llm)

Figure out the number of threads:

In [15]:
NUM_THREADS = multiprocessing.cpu_count()

# Dataset

Use the HotPotQA dataset:

In [7]:
dataset = HotPotQA(train_seed=1, train_size=200, eval_seed=2023, dev_size=1000, test_size=0)
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]
testset = [x.with_inputs('question') for x in dataset.test]

len(trainset), len(devset), len(testset)

(200, 1000, 0)

Note that this dataset only has question and answer pairs. There are no "gold" context or rationale solution provided!

In [10]:
trainset[:5]

[Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys={'question'}),
 Example({'question': 'which  American actor was Candace Kita  guest starred with ', 'answer': 'Bill Murray'}) (input_keys={'question'}),
 Example({'question': 'Which of these publications was most recently published, Who Put the Bomp or Self?', 'answer': 'Self'}) (input_keys={'question'}),
 Example({'question': 'The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?', 'answer': '1950'}) (input_keys={'question'}),
 Example({'question': 'Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?', 'answer': 'Tae Kwon Do Times'}) (input_keys={'question'})]

# The Program

Use the program from the `multihop_finetune` notebook in the dspy repo:

In [13]:
class BasicMH(dspy.Module):
    def __init__(self, passages_per_hop=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=passages_per_hop)
        self.generate_query = [dspy.ChainOfThought("context, question -> search_query") for _ in range(2)]
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")
    
    def forward(self, question):
        context = []
        
        for hop in range(2):
            search_query = self.generate_query[hop](context=context, question=question).search_query
            passages = self.retrieve(search_query).passages
            context = deduplicate(context + passages)

        return self.generate_answer(context=context, question=question).copy(context=context)

zero_shot_prog = BasicMH()

# Compile

Set a metric:

In [16]:
metric_EM = dspy.evaluate.answer_exact_match

Set an optimizer:

In [17]:
optimizer = BootstrapFewShotWithRandomSearch(metric=metric_EM, max_bootstrapped_demos=2, num_threads=NUM_THREADS)


Going to sample between 1 and 2 traces per predictor.
Will attempt to train 16 candidate sets.


Compile!

In [20]:
opt_prog = optimizer.compile(zero_shot_prog, trainset=trainset[:50], valset=trainset[50:200])

  0%|          | 0/150 [00:00<?, ?it/s]