# A Gentle Introduction to [DSPy](https://dspy-docs.vercel.app/)
For grug brained developers.

If you would rather *read* this, you can find it on [LearnByBuilding.AI](https://learnbybuilding.ai/tutorials/). This notebook only contains code, to get some prose along with it, check out the tutorial posted there.

If you like this content, [follow me on twitter](https://twitter.com/bllchmbrs) for more! I'm posting all week about DSPy and providing a lot of "hard earned" lessons that I've gotten from learning the material.

### [Check out my Youtube for the full video tutorial](https://www.youtube.com/@LearnByBuildingAI) that accompanies this notebook.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
import requests
from bs4 import BeautifulSoup
res = requests.get("https://grugbrain.dev/")

In [None]:
soup = BeautifulSoup(res.text, 'html.parser')
raw_text = [p.text for p in soup.find_all('p') if p.text]

In [None]:
raw_text[:10]

In [None]:
from openai import OpenAI
client = OpenAI()
openai_model_name= "gpt-3.5-turbo"

In [None]:
class BuildMessages:
    def __init__(self, system_prompt, user_prompt):
        self.system_prompt = system_prompt
        self.user_prompt = user_prompt

    def render(self, **kwargs):
        sys = self.system_prompt.format(**kwargs)
        user = self.user_prompt.format(**kwargs)

        return [
            {"role":"system", "content":sys},
            {"role":"user", "content":user},
        ]

In [None]:
from functools import cache

@cache
def translate_grug(grug_text):
    prompt = BuildMessages(
    "You are an expert in deciphering strange text. The user will provide text written by someone named Grug and you will provide the translation.",
    "Translate the following text into plain english: {text}. Do not respond with any other text. Only provide that text. Now take a deep breath and begin."
)
    result = client.chat.completions.create(messages=prompt.render(text=grug_text), model=openai_model_name)
    return result.choices[0].message.content

In [None]:
translate_grug(raw_text[0])

In [None]:
dataset = []
for grug_text in raw_text[:10]:
    translated = translate_grug(grug_text)
    dataset.append({"grug_text":grug_text, "plain_english":translated})

In [None]:
import dspy

# Building a Dataset


Or, more simply, using `dspy.Example`.


In [None]:
examples = []
for row in dataset:
    examples.append(dspy.Example(grug_text=row["grug_text"], plain_english=row["plain_english"]).with_inputs("plain_english"))

In [None]:
import numpy as np

def split_for_train_test(values, test_size = 1/3.0):
    np.random.shuffle(values)
    train = int(len(values)-test_size*len(values))
    print(train)
    return values[:train], values[train:]

In [None]:
train, test = split_for_train_test(examples)

In [None]:
train[0]

In [None]:
turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=1000)
dspy.settings.configure(lm=turbo)

# Prompts... I mean Signatures

Note, not really optimized for chat models.

In [None]:
class GrugTranslation(dspy.Signature):
    "Translate plain english to Grug text."
    plain_english = dspy.InputField()
    grug_text = dspy.OutputField()
    # grug_text = dspy.OutputField(prefix="The Grug Text:", format=lambda x: "===" + str(x) + "===")

In [None]:
GrugTranslation.signature

In [None]:
GrugTranslation.with_instructions

In [None]:
# https://github.com/stanfordnlp/dspy/blob/1c10a9d476737533a53d6bee62c234e375eb8fcb/dsp/templates/template_v3.py#L22
from dspy.signatures.signature import signature_to_template
grug_translation_as_template = signature_to_template(GrugTranslation)
print(str(grug_translation_as_template))

In [None]:
print(grug_translation_as_template.query(examples[0]))

# Zero Shot Prompting

In [None]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought(GrugTranslation)
    
    def forward(self, plain_english):
        return self.prog(plain_english=plain_english)

In [None]:
c = CoT()
c.forward("You should not construct complex systems.")

# Making it better, options


1. Zero shot (no examples)
2. Providing examples (few shot)
3. Tuning the prompt + examples
4. Fine tuning the model
5. Tuning the model


# Better examples

But, what is better? How are you measuring that?

Vibes to something measurable.

In [None]:
# https://apps.dtic.mil/sti/tr/pdf/AD0667273.pdf
def automated_readability_index(text):
    import re

    # Count characters (ignoring whitespace)
    characters = len(re.sub(r'\s+', '', text))

    # Count words by splitting the text
    words = len(text.split())

    # Count sentences by finding period, exclamation, or question mark
    sentences = len(re.findall(r'[.!?\n]', text))
    # our change is to add a new line character as grug doesn't seem to use punctuation.

    # Calculate the Automated Readability Index (ARI)
    if words == 0 or sentences == 0:  # Prevent division by zero
        return 0
    
    ari = (4.71 * (characters / words)) + (0.5 * (words / sentences)) - 21.43
    
    return round(ari, 2)

In [None]:
for ex in examples:
    source_ari = automated_readability_index(ex.plain_english)
    grug_ari = automated_readability_index(ex.grug_text)
    print(f"ARI {source_ari} => {grug_ari}")

## First Metric: Readability

In [None]:
def ari_metric(truth, pred, trace=None):
    truth_grug_text = truth.grug_text
    proposed_grug_text = pred.grug_text
    
    gold_ari = automated_readability_index(truth_grug_text)
    pred_ari = automated_readability_index(proposed_grug_text)

    print(f"ARI {gold_ari} => {pred_ari}")

    ari_result = pred_ari <= 7.01
    return ari_result

## Second Metric: Use a better Model to tune

In [None]:
gpt4T = dspy.OpenAI(model='gpt-4-turbo', max_tokens=100, model_type='chat')

# https://dspy-docs.vercel.app/docs/building-blocks/metrics#intermediate-using-ai-feedback-for-your-metric
class AssessBasedOnQuestion(dspy.Signature):
    """Given the assessed text provide a yes or no to the assessment question."""

    assessed_text = dspy.InputField(format=str)
    assessment_question = dspy.InputField(format=str)
    assessment_answer = dspy.OutputField(desc="Yes or No")

Again, this is just a prompt...

In [None]:
example_question_assessment = dspy.Example(assessed_text="This is a test.", assessment_question="Is this a test?", assessment_answer="Yes").with_inputs("assessed_text", "assessment_question")
print(signature_to_template(AssessBasedOnQuestion).query(example_question_assessment))
# one note, it's technically, I believe, a `Prediction` object. But Predictions mirror example functionality:
# https://dspy-docs.vercel.app/docs/deep-dive/signature/executing-signatures#how-predict-works

In [None]:
def similarity_metric(truth, pred, trace=None):
    truth_grug_text = truth.grug_text
    proposed_grug_text = pred.grug_text
    similarity_question = f"""Does the assessed text have the same meaning as the gold_standard text provided?

Gold Standard: "{truth_grug_text}"

Provide only a yes or no answer."""

    with dspy.context(lm=gpt4T):
        assessor = dspy.Predict(AssessBasedOnQuestion)
        raw_similarity_result = assessor(assessed_text=proposed_grug_text, assessment_question=similarity_question)
    print(raw_similarity_result)
    raw_similarity = raw_similarity_result.assessment_answer.lower().strip()
    same_meaning = raw_similarity == 'yes'
    return same_meaning

In [None]:
def overall_metric(provided_example, predicted, trace=None):
    similarity = similarity_metric(provided_example, predicted, trace)
    ari = ari_metric(provided_example, predicted, trace)

    if similarity and ari:
        return True
    return False

In [None]:
from dspy.teleprompt import BootstrapFewShot

config = dict(max_bootstrapped_demos=4, max_labeled_demos=4)
teleprompter = BootstrapFewShot(metric=overall_metric, **config)
teleprompter.max_errors = 1
optimized_cot = teleprompter.compile(CoT(), trainset=train, valset=test)

In [None]:
from dspy.evaluate import Evaluate
individual_metrics = [similarity_metric, ari_metric]

In [None]:
for metric in individual_metrics:
    evaluate = Evaluate(metric=metric, devset=train, num_threads=1, display_progress=True, display_table=5)
    evaluate(optimized_cot)

Follow along for subsequent tutorials on:

1. Automatically optimizing prompts
2. Customizing input to DSPy
3. Saving prompts to use in LangChain or LlamaIndex
4. Tuning and using open source models

Cheers,
[Bill](https://twitter.com/bllchmbrs)

[Learn By Building AI](https://learnbybuilding.ai/?ref=dspy-tutorial)




In [None]:
optimized_cot.forward("You should not construct complex systems.")

In [None]:
optimized_cot