# DSPy Question Answering Pipeline

This notebook implements a DSPy pipeline for optimizing question answering prompts.

## 1. Set up the environment

In [1]:
import dspy
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot

# Configure the Language Model
turbo = dspy.OpenAI(model='gpt-3.5-turbo')
dspy.settings.configure(lm=turbo)

## 2. Load and preprocess the datasets

In [18]:
import json
from dspy import Example

class CustomDataLoader:
    def __init__(self, file_path):
        self.file_path = file_path
    
    def load(self):
        dataset = []
        with open(self.file_path, 'r') as f:
            for line in f:
                record = json.loads(line)
                supporting_paragraphs = [p for p in record['paragraphs'] if p['is_supporting']]
                context = "\n\n".join([p['paragraph_text'] for p in supporting_paragraphs])
                example = Example(
                    question=record['question'],
                    context=context,
                    answers=record['answers']
                ).with_inputs('question', 'context')
                dataset.append(example)
        return dataset

# Load datasets
train_loader = CustomDataLoader('../../data/generated/musique-common/base-dataset-train.jsonl')
eval_loader = CustomDataLoader('../../data/generated/musique-common/base-dataset-validation.jsonl')

trainset = train_loader.load()[:100]
evalset = eval_loader.load()[:10]

print(f"Loaded {len(trainset)} training examples and {len(evalset)} evaluation examples")

Loaded 100 training examples and 10 evaluation examples


## 3. Define Signatures

In [19]:
class GenerateAnswer(dspy.Signature):
    """Answer questions based on the given context."""
    context = dspy.InputField(desc="The context to use for answering the question.")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="The factual answer to the question.")

## 4. Build the Pipeline

In [20]:
class QAModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question, context):
        pred = self.generate_answer(context=context, question=question)
        return dspy.Prediction(answer=pred.answer)

## 5. Define the optimization metric

In [23]:
from dspy.evaluate import answer_exact_match_str

def evaluate_answer(example, pred, trace=None):
    return answer_exact_match_str(pred.answer, example.answers, frac=0.8)

## 6. Implement the optimization process

In [24]:
# Initialize the uncompiled QA module
uncompiled_qa = QAModule()

# Set up the teleprompter
teleprompter = BootstrapFewShot(metric=evaluate_answer)

# Compile and optimize the QA module
compiled_qa = teleprompter.compile(uncompiled_qa, trainset=trainset)

print("QA module compiled and optimized")

 10%|█         | 10/100 [00:07<01:04,  1.40it/s]

Bootstrapped 4 full traces after 11 examples in round 0.
QA module compiled and optimized





## 7. Evaluate the pipeline

In [25]:
# Set up the evaluation function
evaluate_qa = Evaluate(devset=evalset, metric=evaluate_answer, num_threads=1, display_progress=True)

# Evaluate the uncompiled QA module
uncompiled_score = evaluate_qa(uncompiled_qa)
print(f"Uncompiled QA Module Score: {uncompiled_score}")

# Evaluate the compiled QA module
compiled_score = evaluate_qa(compiled_qa)
print(f"Compiled QA Module Score: {compiled_score}")

Average Metric: 1 / 10  (10.0): 100%|██████████| 10/10 [00:12<00:00,  1.21s/it]
Uncompiled QA Module Score: 10.0
Average Metric: 3 / 10  (30.0): 100%|██████████| 10/10 [00:12<00:00,  1.25s/it]
Compiled QA Module Score: 30.0


## 8. (Optional) Error Analysis

In [27]:
def perform_error_analysis(qa_module, dataset, num_samples=5):
    errors = []
    for example in dataset:
        pred = qa_module(question=example.question, context=example.context)
        if not evaluate_answer(example, pred):
            errors.append((example, pred))
    
    print(f"Total errors: {len(errors)}")
    print("\nSample of errors:")
    for example, pred in errors[:num_samples]:
        print(f"Question: {example.question}")
        print(f"True Answer: {example.answers}")
        print(f"Predicted Answer: {pred.answer}")
        print()

print("Error analysis for uncompiled QA module:")
perform_error_analysis(uncompiled_qa, evalset)

print("\nError analysis for compiled QA module:")
perform_error_analysis(compiled_qa, evalset)

Error analysis for uncompiled QA module:
Total errors: 9

Sample of errors:
Question: Where is the Voshmgir District located?
True Answer: ['Caspian Sea', 'in the north-east of the country south of the Caspian Sea']
Predicted Answer: The Voshmgir District is located in Aqqala County, Golestan Province, Iran.

Question: In what part of Florida is Tom Denney's birthplace located?
True Answer: ['in Northern Florida', 'Northern Florida']
Predicted Answer: Tom Denney's birthplace is located in Ocala, Florida.

Question: What record label is the performer who released All Your Faded Things on?
True Answer: ['Kill Rock Stars']
Predicted Answer: Cold Crush Records

Question: What record label does the performer of Emotional Rain belong to?
True Answer: ['Attic', 'Attic Records']
Predicted Answer: The performer of Emotional Rain, Lee Aaron, belongs to A&M Records in Canada and Koch Entertainment in Europe.

Question: Of which watercourse is the river the Willemsbrug crosses over the mouth?
True

In [48]:
compiled_qa.predictors()[0]

Predict(StringSignature(context, question -> rationale, answer
    instructions='Answer questions based on the given context.'
    context = Field(annotation=str required=True json_schema_extra={'desc': 'The context to use for answering the question.', '__dspy_field_type': 'input', 'prefix': 'Context:'})
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the answer}. We ...', '__dspy_field_type': 'output'})
    answer = Field(annotation=str required=True json_schema_extra={'desc': 'The factual answer to the question.', '__dspy_field_type': 'output', 'prefix': 'Answer:'})
))

In [49]:
compiled_qa.save('compiled-qa.json')

[('generate_answer', Predict(StringSignature(context, question -> rationale, answer
    instructions='Answer questions based on the given context.'
    context = Field(annotation=str required=True json_schema_extra={'desc': 'The context to use for answering the question.', '__dspy_field_type': 'input', 'prefix': 'Context:'})
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the answer}. We ...', '__dspy_field_type': 'output'})
    answer = Field(annotation=str required=True json_schema_extra={'desc': 'The factual answer to the question.', '__dspy_field_type': 'output', 'prefix': 'Answer:'})
)))]
