# DSPy Question Answering Pipeline

This notebook implements a DSPy pipeline for optimizing question answering prompts.

## 1. Set up the environment

In [1]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

import dspy
from dspy.evaluate import Evaluate

In [2]:
from bellek.utils import set_seed
set_seed(89)

In [3]:
lm = dspy.LM(
    "openai/llama-3-70b-tgi",
    temperature=0.1,
    cache=False,
    api_base=os.getenv("OPENAI_BASE_URL"),
    api_key=os.getenv("OPENAI_API_KEY"),
)
dspy.configure(lm=lm)

## 2. Load and preprocess the datasets

In [4]:
from datasets import load_dataset

train_ds = load_dataset('bdsaglam/musique-mini', 'answerable', split='train')
val_ds = load_dataset('bdsaglam/musique-mini', 'answerable', split='validation')
train_ds, val_ds

(Dataset({
     features: ['id', 'paragraphs', 'question', 'question_decomposition', 'answer', 'answer_aliases', 'answerable', 'n_hops'],
     num_rows: 300
 }),
 Dataset({
     features: ['id', 'paragraphs', 'question', 'question_decomposition', 'answer', 'answer_aliases', 'answerable', 'n_hops'],
     num_rows: 300
 }))

In [5]:
def format_paragraph(paragraph):
    text = paragraph['paragraph_text']
    title = paragraph['title']
    return f"# {title}\n{text}"

def make_example(record):
    supporting_paragraphs = [p for p in record['paragraphs'] if p['is_supporting']]
    context = "\n\n".join([format_paragraph(p) for p in supporting_paragraphs])
    return dspy.Example(
        question=record['question'],
        context=context,
        answer=record['answer'],
        answers=[record['answer'], *record['answer_aliases']],
    ).with_inputs('question', 'context')

In [6]:
trainset = [make_example(record) for record in train_ds]
valset = [make_example(record) for record in val_ds]
len(trainset), len(valset)

(300, 300)

## 3. Define Signatures

In [8]:
class GenerateAnswer(dspy.Signature):
    """Answer the question based on the given context."""
    context = dspy.InputField()
    question = dspy.InputField()
    answer = dspy.OutputField()

In [9]:
import dspy
from dspy.primitives.program import Module
from dspy.signatures.signature import ensure_signature

In [10]:
# |export


def validate_triples_format(triples: str, sep: str = ";") -> bool:
    """Check if the triples are in the correct format."""
    return any(len(triple.split(sep)) != 3 for triple in triples.splitlines())


class ConnectTheEntities(Module):
    def __init__(self, signature, rationale_type=None, activated=True, **config):
        super().__init__()

        self.activated = activated

        self.signature = signature = ensure_signature(signature)

        prefix = "Let's identify the relevant entity-relation-entity triples in the format of 'subj;relation;obj'\n"
        desc = "${triples}"
        rationale_type = rationale_type or dspy.OutputField(prefix=prefix, desc=desc)

        # Add "triples" field to the output signature.
        extended_signature = signature.prepend("triples", rationale_type, type_=str)

        self._predict = dspy.Predict(extended_signature, **config)
        self._predict.extended_signature = extended_signature

    def forward(self, **kwargs):
        assert self.activated in [True, False]

        signature = kwargs.pop("new_signature", self._predict.extended_signature if self.activated else self.signature)
        pred = self._predict(signature=signature, **kwargs)
        dspy.Suggest(
            validate_triples_format(pred.triples),
            "Triples must be in the format of 'subj;relation;obj' and each triple must be in a new line.",
            target_module=self._predict,
        )
        return pred

    @property
    def demos(self):
        return self._predict.demos

    @property
    def extended_signature(self):
        return self._predict.extended_signature

## 4. Build the Pipeline

In [11]:
# Initialize the uncompiled QA module
uncompiled_program = ConnectTheEntities(GenerateAnswer)

## 5. Define the optimization metric

In [14]:
from bellek.musique.eval import compute_scores

def evaluate_answer(example, pred, trace=None):
    scores = compute_scores(pred.answer, example.answers)
    return scores["f1"]

In [15]:
evaluate_program = Evaluate(
    metric=evaluate_answer,
    devset=valset,
    num_threads=32,
    display_progress=True,
    return_outputs=True,
)

In [16]:
# # Evaluate the uncompiled question decomposition module
# uncompiled_score, uncompiled_results = evaluate_program(uncompiled_program)
# print("Uncompiled Question Answering Scores")
# compute_scores(uncompiled_results)

## 6. Implement the optimization process

In [17]:
from dspy.teleprompt import BootstrapFewShot, LabeledFewShot, BootstrapFewShotWithRandomSearch, MIPROv2

In [None]:
optimizer = make_optimizer(optimizer_config)
compile_params = optimizer_config.get("compile_params", {})
trained_program = optimizer.compile(program, trainset=examples, **compile_params)

## 8. Error Analysis

In [27]:
def present_errors(results):
    errors = [(example, pred) for example, pred, score in results if float(score) < 0.9] 
    for example, pred in errors:
        print(f"Question: {example.question}")
        print(f"Context: {example.context}")
        print(f"Groundtruth Answers: {example.answers}")
        print(f"Predicted Answer: {pred.answer}")
        print('='*80)

In [28]:
print("Error analysis for uncompiled program\n\n")
present_errors(uncompiled_results)

Error analysis for uncompiled program


Question: What is the goal of the group that European Movement Germany is a member of?
Context: # Bernd Hüttemann
Bernd Hüttemann (born December 8, 1970 in Paderborn) is Vice President of the European Movement International and Secretary General of the European Movement Germany.

# European Movement International
The European Movement International is a lobbying association that coordinates the efforts of associations and national councils with the goal of promoting European integration, and disseminating information about it.
Groundtruth Answers: ['European integration']
Predicted Answer: The goal of the group that European Movement Germany is a member of is to promote European integration and disseminate information about it.
Question: What record label is the performer of Almost Made Ya signed to?
Context: # Almost Made Ya
Almost Made Ya is an R&B song by American duo Ali & Gipp, for the debut album, "Kinfolk" (2007). The song samples Brandy's

In [29]:
print("Error analysis for compiled program:")
present_errors(compiled_results)

Error analysis for compiled program:
Question: What is the goal of the group that European Movement Germany is a member of?
Context: # Bernd Hüttemann
Bernd Hüttemann (born December 8, 1970 in Paderborn) is Vice President of the European Movement International and Secretary General of the European Movement Germany.

# European Movement International
The European Movement International is a lobbying association that coordinates the efforts of associations and national councils with the goal of promoting European integration, and disseminating information about it.
Groundtruth Answers: ['European integration']
Predicted Answer: Promoting European integration and disseminating information about it.
Question: What record label is the performer of Almost Made Ya signed to?
Context: # Almost Made Ya
Almost Made Ya is an R&B song by American duo Ali & Gipp, for the debut album, "Kinfolk" (2007). The song samples Brandy's "Almost Doesn't Count". The song is about a relationship between a femal

## Inspect

In [45]:
i = 1
example = trainset[i]
pred = compiled_program(context=example.context, question=example.question)
example.answers, pred.answer

Prediction(
    answer='1994'
)

In [51]:
lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': 'Your input fields are:\n1. `context` (str)\n2. `question` (str)\n\nYour output fields are:\n1. `reasoning` (str)\n2. `answer` (str)\n\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## context ## ]]\n{context}\n\n[[ ## question ## ]]\n{question}\n\n[[ ## reasoning ## ]]\n{reasoning}\n\n[[ ## answer ## ]]\n{answer}\n\n[[ ## completed ## ]]\n\nIn adhering to this structure, your objective is: \n        Answer the question based on the given context.'},
  {'role': 'user',
   'content': 'This is an example of the task, though some input or output fields are not supplied.\n\n[[ ## context ## ]]\nQing China reached its largest extent during the 18th century, when it ruled China proper (eighteen provinces) as well as the areas of present-day Northeast China, Inner Mongolia, Outer Mongolia, Xinjiang and Tibet, at approximately 13 million km2 in size. There were originally 18