# DSPy Prompts in 30 seconds

If you would rather *read* this, you can find it on [LearnByBuilding.AI](https://learnbybuilding.ai/tutorials/). This notebook only contains code, to get some prose along with it, check out the tutorial posted there.

If you like this content, [follow me on twitter](https://twitter.com/bllchmbrs) for more! I'm posting all week about DSPy and providing a lot of "hard earned" lessons that I've gotten from learning the material.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
import dspy
turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=1000)
dspy.settings.configure(lm=turbo)

In [None]:
document = """
Four score and seven years ago.
Our fathers brought forth on this continent, a new nation,
conceived in Liberty, and dedicated to the proposition
that we could write AI prompts for the machines.

Or, it would be written in python as:

current_timestamp = datetime.datetime.now() - datetime.timedelta(years=7)
def bring_forth(nation, liberty, proposition):
    return nation, liberty, proposition
"""

In [None]:
print(document)

In [None]:
import dspy
v1 = dspy.Predict("document -> summary")
print(v1)

In [None]:
print(v1.forward(document=document).summary)

In [None]:
turbo.inspect_history()

Ahh, it strips the new line characters.

In [None]:
from dspy.signatures import signature_to_template
print(signature_to_template(v1.signature).query(dspy.Example(document=document)))

In [None]:
class Summarizer(dspy.Signature):
    document = dspy.InputField()
    summary = dspy.OutputField()

In [None]:
v2 = dspy.Predict(Summarizer)

In [None]:
v2.signature

In [None]:
print(signature_to_template(v2.signature).query(dspy.Example(document=document)))

In [None]:
print(v2.forward(document=document).summary)

In [None]:
turbo.inspect_history()

In [None]:
class GoodSummarizer(dspy.Signature):
    "The user will give you a document, you must produce a summary of this document. Only include the summary."
    document = dspy.InputField(format=lambda x: "\n===\n" + str(x) + "\n===\n")
    summary = dspy.OutputField(format=str)
v3 = dspy.Predict(GoodSummarizer)

In [None]:
v3 = dspy.Predict(GoodSummarizer)

In [None]:
v1.signature

In [None]:
v2.signature

In [None]:
v3.signature

In [None]:
print(signature_to_template(v3.signature).query(dspy.Example(document=document)))

In [None]:
prediction = v3.forward(document=document)
prediction

In [None]:
turbo.inspect_history()

In [None]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought(GrugTranslation)
    
    def forward(self, plain_english):
        return self.prog(plain_english=plain_english)

In [None]:
c = CoT()
c.forward("You should not construct complex systems.")

# Making it better, options


1. Zero shot (no examples)
2. Providing examples (few shot)
3. Tuning the prompt + examples
4. Fine tuning the model
5. Tuning the model


# Better examples

But, what is better? How are you measuring that?

Vibes to something measurable.

In [None]:
# https://apps.dtic.mil/sti/tr/pdf/AD0667273.pdf
def automated_readability_index(text):
    import re

    # Count characters (ignoring whitespace)
    characters = len(re.sub(r'\s+', '', text))

    # Count words by splitting the text
    words = len(text.split())

    # Count sentences by finding period, exclamation, or question mark
    sentences = len(re.findall(r'[.!?\n]', text))
    # our change is to add a new line character as grug doesn't seem to use punctuation.

    # Calculate the Automated Readability Index (ARI)
    if words == 0 or sentences == 0:  # Prevent division by zero
        return 0
    
    ari = (4.71 * (characters / words)) + (0.5 * (words / sentences)) - 21.43
    
    return round(ari, 2)

In [None]:
for ex in examples:
    source_ari = automated_readability_index(ex.plain_english)
    grug_ari = automated_readability_index(ex.grug_text)
    print(f"ARI {source_ari} => {grug_ari}")

## First Metric: Readability

In [None]:
def ari_metric(truth, pred, trace=None):
    truth_grug_text = truth.grug_text
    proposed_grug_text = pred.grug_text
    
    gold_ari = automated_readability_index(truth_grug_text)
    pred_ari = automated_readability_index(proposed_grug_text)

    print(f"ARI {gold_ari} => {pred_ari}")

    ari_result = pred_ari <= 7.01
    return ari_result

## Second Metric: Use a better Model to tune

In [None]:
gpt4T = dspy.OpenAI(model='gpt-4-turbo', max_tokens=100, model_type='chat')

# https://dspy-docs.vercel.app/docs/building-blocks/metrics#intermediate-using-ai-feedback-for-your-metric
class AssessBasedOnQuestion(dspy.Signature):
    """Given the assessed text provide a yes or no to the assessment question."""

    assessed_text = dspy.InputField(format=str)
    assessment_question = dspy.InputField(format=str)
    assessment_answer = dspy.OutputField(desc="Yes or No")

Again, this is just a prompt...

In [None]:
example_question_assessment = dspy.Example(assessed_text="This is a test.", assessment_question="Is this a test?", assessment_answer="Yes").with_inputs("assessed_text", "assessment_question")
print(signature_to_template(AssessBasedOnQuestion).query(example_question_assessment))
# one note, it's technically, I believe, a `Prediction` object. But Predictions mirror example functionality:
# https://dspy-docs.vercel.app/docs/deep-dive/signature/executing-signatures#how-predict-works

In [None]:
def similarity_metric(truth, pred, trace=None):
    truth_grug_text = truth.grug_text
    proposed_grug_text = pred.grug_text
    similarity_question = f"""Does the assessed text have the same meaning as the gold_standard text provided?

Gold Standard: "{truth_grug_text}"

Provide only a yes or no answer."""

    with dspy.context(lm=gpt4T):
        assessor = dspy.Predict(AssessBasedOnQuestion)
        raw_similarity_result = assessor(assessed_text=proposed_grug_text, assessment_question=similarity_question)
    print(raw_similarity_result)
    raw_similarity = raw_similarity_result.assessment_answer.lower().strip()
    same_meaning = raw_similarity == 'yes'
    return same_meaning

In [None]:
def overall_metric(provided_example, predicted, trace=None):
    similarity = similarity_metric(provided_example, predicted, trace)
    ari = ari_metric(provided_example, predicted, trace)

    if similarity and ari:
        return True
    return False

In [None]:
from dspy.teleprompt import BootstrapFewShot

config = dict(max_bootstrapped_demos=4, max_labeled_demos=4)
teleprompter = BootstrapFewShot(metric=overall_metric, **config)
teleprompter.max_errors = 1
optimized_cot = teleprompter.compile(CoT(), trainset=train, valset=test)

In [None]:
from dspy.evaluate import Evaluate
individual_metrics = [similarity_metric, ari_metric]

In [None]:
for metric in individual_metrics:
    evaluate = Evaluate(metric=metric, devset=train, num_threads=1, display_progress=True, display_table=5)
    evaluate(optimized_cot)

Follow along for subsequent tutorials on:

1. Automatically optimizing prompts
2. Customizing input to DSPy
3. Saving prompts to use in LangChain or LlamaIndex
4. Tuning and using open source models

Cheers,
[Bill](https://twitter.com/bllchmbrs)

[Learn By Building AI](https://learnbybuilding.ai/?ref=dspy-tutorial)




In [None]:
optimized_cot.forward("You should not construct complex systems.")

In [None]:
optimized_cot