In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:

import dspy
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot

# Configure the Language Model
turbo = dspy.OpenAI(model='gpt-3.5-turbo')
dspy.settings.configure(lm=turbo)

In [None]:
import json
from dspy import Example

class MuSiQueDataLoader:
    def __init__(self, file_path):
        self.file_path = file_path
    
    def load(self):
        dataset = []
        with open(self.file_path, 'r') as f:
            for line in f:
                record = json.loads(line)
                example = Example(
                    question=record['question'],
                    decomposition=[q['question'] for q in record['question_decomposition']]
                ).with_inputs('question')
                dataset.append(example)
        return dataset

# Load datasets
train_loader = MuSiQueDataLoader('../../data/generated/musique-common/base-dataset-train.jsonl')
eval_loader = MuSiQueDataLoader('../../data/generated/musique-common/base-dataset-validation.jsonl')

trainset = train_loader.load()[:100]
evalset = eval_loader.load()[:10]

print(f"Loaded {len(trainset)} training examples and {len(evalset)} evaluation examples")


In [None]:
# 3. Define Signatures

class DecomposeQuestion(dspy.Signature):
    """Decompose a complex question into simpler sub-questions."""
    question = dspy.InputField()
    decomposition = dspy.OutputField(desc="List of sub-questions, using '#n >>' notation for dependent questions")

# 4. Define the Question Decomposition Module
class QuestionDecompositionModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.decompose = dspy.ChainOfThought(DecomposeQuestion)

    def forward(self, question):
        pred = self.decompose(question=question)
        return dspy.Prediction(decomposition=pred.decomposition)


# ## 5. Define the optimization metric

def evaluate_decomposition(example, pred, trace=None):
    gold_decomp = set(example.decomposition)
    pred_decomp = set(pred.decomposition)
    
    # Calculate Jaccard similarity
    intersection = len(gold_decomp.intersection(pred_decomp))
    union = len(gold_decomp.union(pred_decomp))
    jaccard = intersection / union if union > 0 else 0
    
    return jaccard > 0.7  # Consider it correct if Jaccard similarity is greater than 0.7


# ## 6. Implement the optimization process


# Initialize the uncompiled question decomposition module
uncompiled_qd = QuestionDecompositionModule()

# Set up the teleprompter
teleprompter = BootstrapFewShot(metric=evaluate_decomposition)

# Compile and optimize the question decomposition module
compiled_qd = teleprompter.compile(uncompiled_qd, trainset=trainset)

print("Question Decomposition module compiled and optimized")


# ## 7. Evaluate the pipeline


# Set up the evaluation function
evaluate_qd = Evaluate(devset=evalset, metric=evaluate_decomposition, num_threads=1, display_progress=True)

# Evaluate the uncompiled question decomposition module
uncompiled_score = evaluate_qd(uncompiled_qd)
print(f"Uncompiled Question Decomposition Module Score: {uncompiled_score}")

# Evaluate the compiled question decomposition module
compiled_score = evaluate_qd(compiled_qd)
print(f"Compiled Question Decomposition Module Score: {compiled_score}")


# ## 8. (Optional) Error Analysis


def perform_error_analysis(qd_module, dataset, num_samples=5):
    errors = []
    for example in dataset:
        pred = qd_module(question=example.question)
        if not evaluate_decomposition(example, pred):
            errors.append((example, pred))
    
    print(f"Total errors: {len(errors)}")
    print("\nSample of errors:")
    for example, pred in errors[:num_samples]:
        print(f"Original Question: {example.question}")
        print(f"True Decomposition: {example.decomposition}")
        print(f"Predicted Decomposition: {pred.decomposition}")
        print()

print("Error analysis for uncompiled Question Decomposition module:")
perform_error_analysis(uncompiled_qd, evalset)

print("\nError analysis for compiled Question Decomposition module:")
perform_error_analysis(compiled_qd, evalset)


compiled_qd.predictors()[0]


compiled_qd.save('compiled-qd.json')