# DSPy 


 Guide: YouTube - [Let the LLM Write the Prompts: An Intro to DSPy in Compound AI Pipelines](https://www.youtube.com/watch?v=I9ZtkgYZnOw)

In [2]:
#!pip install dspy

In [3]:
import sys
import os
import dspy 
from common.my_settings import MySettings  
from common.utils import md
from common.llm_client_factory import LlmClientFactory
from dspy_utils.dspy_helpers import md_dspy

settings = MySettings().get()

In [4]:
lm = dspy.LM(
    'gpt-4.1',  # Changed from gpt-4o to gpt-3.5-turbo
    model_type='chat', 
    cache=False, 
    api_key=os.getenv("OPENAI_API_KEY")
)

dspy.configure(lm=lm)

## Prediction
14m 52s in video

In [5]:
# Define a module predict and assign it a signature
predict = dspy.Predict('question -> answer')

In [6]:
# Call LLM for prediction
prediction = predict(question='Why is the sky blue?')

In [7]:
md(type(prediction))
md("**Answer**  ", "", prediction.answer)

<class 'dspy.primitives.prediction.Prediction'>

**Answer**  

The sky appears blue because of a phenomenon called Rayleigh scattering. As sunlight passes through Earth's atmosphere, the shorter blue wavelengths of light are scattered in all directions by the gases and particles in the air much more than the longer red wavelengths. This scattered blue light is what we see when we look up at the sky.

# Signatures

Location 16:43 in video

In [8]:
# Create classes

import dspy 
from pydantic import BaseModel
from typing import Literal

class Place(BaseModel):
    address: str = dspy.InputField()
    name: str = dspy.InputField()
    
class PlaceMatcher(dspy.Signature):
    """
    Verify that the text is based on the provided context.
    """
    # Inputs
    place_one: Place = dspy.InputField()
    place_two: Place = dspy.InputField()
    
    # Outputs
    is_match: bool = dspy.OutputField(desc="Do the two places refer to the same place?")
    match_confidence: Literal["low", "medium", "high"] = dspy.OutputField()
    rationale: str = dspy.OutputField(desc="Rationale for the match decision")

In [9]:
# Call LLM
predict = dspy.Predict(PlaceMatcher)
result = predict(place_one=Place(address="123 Main St, Springfield", name="Springfield Library"),
                 place_two=Place(address="123 Main St, Springfield", name="Springfield Public Library"))

In [10]:
md(result)

Prediction(
    is_match=True,
    match_confidence='high',
    rationale='Both entries share the exact same address ("123 Main St, Springfield"), and their names are highly similar, differing only by the inclusion of "Public" in the second name. It is common for institutions like libraries to be referred to both with and without the word "Public" in their names. Given the identical address and the strong similarity in names, it is highly likely these refer to the same place.'
)

# Simple RAG


In [11]:
# %pip install deno

In [12]:
# Setup

def evaluate_math(expression: str):
    return dspy.PythonInterpreter({}).execute(expression)

# test function
# md(evaluate_math('2 + 3'))

react = dspy.ReAct("question -> answer: float", tools=[evaluate_math])

In [13]:
# Call LLM
result = react(question="What is the volume of Earth?")

In [14]:
print(type(result))
print(result)

<class 'dspy.primitives.prediction.Prediction'>
Prediction(
    trajectory={'thought_0': 'The volume of a sphere is given by the formula V = (4/3) * π * r^3. The average radius of Earth is approximately 6,371 km. I will use this value to calculate the volume.', 'tool_name_0': 'evaluate_math', 'tool_args_0': {'expression': '(4/3) * 3.141592653589793 * (6371**3)'}, 'observation_0': 'Execution error in evaluate_math: \nTraceback (most recent call last):\n  File "/usr/local/python/3.12.1/lib/python3.12/site-packages/dspy/primitives/python_interpreter.py", line 124, in _ensure_deno_process\n    self.deno_process = subprocess.Popen(\n                        ^^^^^^^^^^^^^^^^^\n  File "/usr/local/python/3.12.1/lib/python3.12/subprocess.py", line 1026, in __init__\n    self._execute_child(args, executable, preexec_fn, close_fds,\n  File "/usr/local/python/3.12.1/lib/python3.12/subprocess.py", line 1950, in _execute_child\n    raise child_exception_type(errno_num, err_msg, err_filename)\nFileNot

In [15]:
print(result)

Prediction(
    trajectory={'thought_0': 'The volume of a sphere is given by the formula V = (4/3) * π * r^3. The average radius of Earth is approximately 6,371 km. I will use this value to calculate the volume.', 'tool_name_0': 'evaluate_math', 'tool_args_0': {'expression': '(4/3) * 3.141592653589793 * (6371**3)'}, 'observation_0': 'Execution error in evaluate_math: \nTraceback (most recent call last):\n  File "/usr/local/python/3.12.1/lib/python3.12/site-packages/dspy/primitives/python_interpreter.py", line 124, in _ensure_deno_process\n    self.deno_process = subprocess.Popen(\n                        ^^^^^^^^^^^^^^^^^\n  File "/usr/local/python/3.12.1/lib/python3.12/subprocess.py", line 1026, in __init__\n    self._execute_child(args, executable, preexec_fn, close_fds,\n  File "/usr/local/python/3.12.1/lib/python3.12/subprocess.py", line 1950, in _execute_child\n    raise child_exception_type(errno_num, err_msg, err_filename)\nFileNotFoundError: [Errno 2] No such file or directory:

# Optimise Prompts

Location 18:19 in video.  

In [16]:
# from dspy import Example

# trainset = [
#     Example(
#         place_one={"address": "123 Main St, Springfield", "name": "Springfield Library"},
#         place_two={"address": "123 Main St, Springfield", "name": "Springfield Public Library"}
#         ,is_match=True
#     ).with_inputs("place_one", "place_two"),

#     Example(
#         place_one={"address": "456 Oak Ave, Shelbyville", "name": "Shelbyville Museum"},
#         place_two={"address": "456 Oak Avenue, Shelbyville", "name": "Shelbyville History Museum"},
#         is_match=True
#     ).with_inputs("place_one", "place_two"),

#     Example(
#         place_one={"address": "789 Pine Rd, Capital City", "name": "Capital City Zoo"},
#         place_two={"address": "101 Maple St, Capital City", "name": "Capital City Aquarium"},
#         is_match=False
#     ).with_inputs("place_one", "place_two"),

#     Example(
#         place_one={"address": "202 Birch Blvd, Ogdenville", "name": "Ogdenville Coffee House"},
#         place_two={"address": "202 Birch Blvd, Ogdenville", "name": "Ogdenville Café"},
#         is_match=True
#     ).with_inputs("place_one", "place_two"),

#     Example(
#         place_one={"address": "303 Cedar Ln, North Haverbrook", "name": "North Haverbrook Gym"},
#         place_two={"address": "404 Elm St, North Haverbrook", "name": "North Haverbrook Fitness Center"},
#         is_match=False
#     ).with_inputs("place_one", "place_two"),
# ]


In [17]:
# matcher = dspy.Predict(PlaceMatcher)
# result = matcher(place_one=Place(address="123 Main St, Springfield", name="Springfield Library"),
#                  place_two=Place(address="123 Main St, Springfield", name="Springfield Public Library"))

# md(result)

In [18]:
# def validate_match(example, pred, trace=None):
#     # print('valdiate(): ', )
#     return example.is_match == pred.is_match

# from dspy.teleprompt import *

# prompt_lm = common.LlmClientFactory(settings).get_dspy_lm_chat_connecting_to_internal_databricks()

# tp = dspy.MIPROv2(metric=validate_match, auto="light", prompt_model=prompt_lm, task_model=lm)
# optimized_matcher = tp.compile(matcher, trainset=trainset, requires_permission_to_run=False)

In [19]:
# result.inspect()