In [2]:
!pip install dspy

Collecting dspy
  Using cached dspy-3.0.3-py3-none-any.whl.metadata (7.2 kB)
Collecting backoff>=2.2 (from dspy)
  Using cached backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting joblib~=1.3 (from dspy)
  Using cached joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting openai>=0.28.1 (from dspy)
  Downloading openai-2.3.0-py3-none-any.whl.metadata (29 kB)
Collecting regex>=2023.10.3 (from dspy)
  Using cached regex-2025.9.18-cp313-cp313-macosx_11_0_arm64.whl.metadata (40 kB)
Collecting orjson>=3.9.0 (from dspy)
  Using cached orjson-3.11.3-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl.metadata (41 kB)
Collecting tqdm>=4.66.1 (from dspy)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting requests>=2.31.0 (from dspy)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting optuna>=3.4.0 (from dspy)
  Using cached optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting pydantic>=2.0 (from dspy)
  Usi

In [4]:
import dspy

dspy.configure(lm=dspy.LM(model="gpt-4o-mini"))

class QA(dspy.Signature):
	"""Answer the question as an expert in the topic."""
	topic = dspy.InputField()
	context = dspy.InputField()
	question = dspy.InputField()
	answer = dspy.OutputField()


predict = dspy.Predict(QA)

predict(topic="physics", context="The speed of light is 300,000 km/s.", question="What is the speed of light?")


Prediction(
    answer='The speed of light is 300,000 kilometers per second (km/s).'
)

In [5]:
dspy.inspect_history()





[34m[2025-10-13T20:30:04.998384][0m

[31mSystem message:[0m

Your input fields are:
1. `topic` (str): 
2. `context` (str): 
3. `question` (str):
Your output fields are:
1. `answer` (str):
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## topic ## ]]
{topic}

[[ ## context ## ]]
{context}

[[ ## question ## ]]
{question}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Answer the question as an expert in the topic.


[31mUser message:[0m

[[ ## topic ## ]]
physics

[[ ## context ## ]]
The speed of light is 300,000 km/s.

[[ ## question ## ]]
What is the speed of light?

Respond with the corresponding output fields, starting with the field `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.


[31mResponse:[0m

[32m[[ ## answer ## ]]
The speed of light is 300,000 kilometers per second (km/s).

[[ ## completed ## ]][0m







In [3]:
import dspy

# Setting up an OpenAI model
lm = dspy.LM('openai/gpt-5-mini',
temperature=1, max_tokens=16000) # additional params

# Configure globally
lm("Hello")
dspy.configure(lm=lm)


In [6]:
import dspy

# String signature: input -> output
signature = "question -> answer"

# Use it in a module
qa = dspy.Predict(signature)
response = qa(question="What is context engineering? Give me a brief explanation in one line.")
print(response.answer)

Context engineering is the practice of designing and managing the information provided to AI models (prompts, system messages, memories, and retrieved data) to make their outputs accurate, relevant, and consistent.


In [7]:
qa_types = dspy.Predict("context: list[str], question: str -> reasoning: str, answer: str, confidence: float")

response = qa_types(context=["The speed of light is 300,000 km/s.", "The speed of sound is 343 m/s."], question="What is the speed of light?")
print(response.answer)
print(response.confidence)
print(response.reasoning)





300,000 km/s
0.95
The context explicitly states the speed of light as "300,000 km/s," so I use that value.


In [9]:
response = qa_types(context=["The speed of light is 300,000 km/s.", "The speed of sound is 343 m/s."], question="What is the speed of light multiplied by the speed of sound?")

In [10]:
print(response.answer)
print(response.confidence)
print(response.reasoning)

1.029 × 10^11 m^2/s^2 (102,900,000,000 m^2/s^2)
0.95
Convert both speeds to the same units. The speed of light given is 300,000 km/s = 300,000 × 1,000 m/s = 300,000,000 m/s. Multiply by the speed of sound 343 m/s:
300,000,000 m/s × 343 m/s = 102,900,000,000 m^2/s^2 = 1.029 × 10^11 m^2/s^2.


In [11]:
class QASignature(dspy.Signature):
    """Answer questions based on provided context with reasoning and confidence."""
    
    context: list[str] = dspy.InputField()
    question: str = dspy.InputField()
    reasoning: str = dspy.OutputField()
    answer = dspy.OutputField()
    confidence: float = dspy.OutputField()

# Use the class-based signature
qa_types_class = dspy.Predict(QASignature)

response = qa_types_class(
    context=["The speed of light is 300,000 km/s.", "The speed of sound is 343 m/s."],
    question="What is the speed of light?"
)
print(response.answer)
print(response.confidence)
print(response.reasoning)


300,000 km/s (approximately 3.0 × 10^8 m/s)
0.95
The provided context explicitly states "The speed of light is 300,000 km/s." Converting units, 300,000 km/s = 300,000,000 m/s = 3.0 × 10^8 m/s.


In [16]:
from pydantic import BaseModel, Field

# Define a Pydantic model for structured output
class AnswerWithConfidence(BaseModel):
    answer: str = Field(description="Clear, concise answer to the question")
    confidence: float = Field(
        description="Confidence score between 0 and 1",
        ge=0.0,
        le=1.0
    )

class QASignature(dspy.Signature):
    """Answer questions based on provided context with reasoning and confidence."""
    
    context: list[str] = dspy.InputField()
    question: str = dspy.InputField()
    reasoning: str = dspy.OutputField()
    output: AnswerWithConfidence = dspy.OutputField()

# Use the class-based signature
qa_types_class = dspy.Predict(QASignature)

response = qa_types_class(
    context=["The speed of light is 300,000 km/s.", "The speed of sound is 343 m/s."],
    question="What is the speed of light?"
)
print(f"Answer: {response.output.answer}")
print(f"Confidence: {response.output.confidence}")
print(f"Reasoning: {response.reasoning}")


Answer: 300,000 km/s (which is 3.0 × 10^8 m/s).
Confidence: 1.0
Reasoning: The provided context states the speed of light as "300,000 km/s". I'll report that value and include the equivalent in meters per second.


In [19]:
# Create a module with a signature
module = dspy.ChainOfThought("question -> answer")

# Call it like a function
result = module(question="What is 2 x 450?")

# Access outputs by name
print(result.reasoning)
print(result.answer)

Multiply 450 by 2: 450 + 450 = 900.
900


In [None]:
class QAWithCoT(dspy.Signature):
    question = dspy.InputField(desc="The question to answer.")
    reasoning = dspy.OutputField()
    
    answer = dspy.OutputField(desc="The final answer.")

qa_cot = dspy.Predict(QAWithCoT)

response = qa_cot(question="What is 2 x 450?")

print(response.reasoning)
print(response.answer)

qa_cot_inline = dspy.ChainOfThought("question -> answer")

response_inline = qa_cot_inline(question="What is 2 x 450?")

print(response.reasoning)
print(response.answer)



Multiply 450 by 2: 450 + 450 = 900.
900


In [34]:
import dspy
from dspy.signatures import ensure_signature

# --- 1. Define a manual signature with explicit reasoning ---
class QAWithReasoning(dspy.Signature):
    reasoning = dspy.OutputField(
        # structured adapter ignores prefix visually, but CoT sets this:
        prefix="Reasoning: Let's think step by step in order to",
        # slot placeholder, so adapter won't print prose:
        desc="${reasoning}"
    )
    question = dspy.InputField(desc="The question to answer.")
    answer = dspy.OutputField(desc="The final answer.")

qa_manual = dspy.Predict(QAWithReasoning)

print("\n=== Manual CoT Signature ===")
response_manual = qa_manual(question="What is 2 x 450?")
print("Reasoning:", response_manual.reasoning)
print("Answer:", response_manual.answer)

# --- 2. Use DSPy's built-in ChainOfThought wrapper ---
qa_auto = dspy.ChainOfThought("question -> answer")

print("\n=== ChainOfThought Wrapper ===")
response_auto = qa_auto(question="What is 2 x 450?")
print("Reasoning:", response_auto.reasoning)
print("Answer:", response_auto.answer)

# --- 3. Show the internal field orders to confirm equivalence ---
base_sig = ensure_signature("question -> answer")
manual_sig = qa_manual.signature
auto_sig = qa_auto.predict.signature  # extended signature from ChainOfThought

print("\n--- Signature Field Order Comparison ---")
print("Base signature fields:", [f for f in base_sig.fields])
print("Manual signature fields:", [f for f in manual_sig.fields])
print("ChainOfThought fields:", [f for f in auto_sig.fields])

dspy.inspect_history(n=2)



=== Manual CoT Signature ===
Reasoning: Multiply 450 by 2: 450 + 450 = 900.
Answer: 900

=== ChainOfThought Wrapper ===
Reasoning: Multiply 450 by 2: 450 + 450 = 900.
Answer: 900

--- Signature Field Order Comparison ---
Base signature fields: ['question', 'answer']
Manual signature fields: ['question', 'reasoning', 'answer']
ChainOfThought fields: ['question', 'reasoning', 'answer']




[34m[2025-10-23T02:01:27.613635][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str): The question to answer.
Your output fields are:
1. `reasoning` (str): 
2. `answer` (str): The final answer.
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## reasoning ## ]]
{reasoning}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Given the fields `question`, produce the fields `reasoning`, `answer`.


[31mUser message:[0m

[[ ## question ## 

In [35]:
# Define your tools as functions
def get_weather(city: str) -> str:
    """Get the current weather for a city."""
    # In a real implementation, this would call a weather API
    return f"The weather in {city} is sunny and 75°F"

# Create a ReAct agent
react_agent = dspy.ReAct(
    signature="question -> answer",
    tools=[get_weather],
    max_iters=5
)

# Use the agent
result = react_agent(question="What's the weather like in Tokyo?")
print(result.answer)
print("Tool calls made:", result.trajectory)

It's sunny in Tokyo, about 75°F (≈24°C).
Tool calls made: {'thought_0': "I'll fetch the current weather for Tokyo using the get_weather tool.", 'tool_name_0': 'get_weather', 'tool_args_0': {'city': 'Tokyo'}, 'observation_0': 'The weather in Tokyo is sunny and 75°F', 'thought_1': "I have the current weather for Tokyo: sunny and 75°F. I'll finish and provide that as the answer.", 'tool_name_1': 'finish', 'tool_args_1': {}, 'observation_1': 'Completed.'}


In [29]:
dspy.inspect_history(n=2)





[34m[2025-10-23T01:44:53.449067][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str): The question to answer.
Your output fields are:
1. `reasoning` (str): 
2. `answer` (str): The final answer.
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## reasoning ## ]]
{reasoning}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Given the fields `question`, produce the fields `reasoning`, `answer`.


[31mUser message:[0m

[[ ## question ## ]]
What is 2 x 450?

Respond with the corresponding output fields, starting with the field `[[ ## reasoning ## ]]`, then `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.


[31mResponse:[0m

[32m[[ ## reasoning ## ]]
Multiply 450 by 2: 450 + 450 = 900.

[[ ## answer ## ]]
900

[[ ## completed ## ]][0m





[34m[2025-10-23T01:44:54.926590][0m

[31

In [None]:
import dspy

class ToolSignature(dspy.Signature):
    """Signature for manual tool handling."""
    question: str = dspy.InputField()
    tools: list[dspy.Tool] = dspy.InputField()
    outputs: dspy.ToolCalls = dspy.OutputField()

def weather(city: str) -> str:
    """Get weather information for a city."""
    return f"The weather in {city} is sunny"


# Create tool instances
tools = {
    "weather": dspy.Tool(weather),
}

# Create predictor
predictor = dspy.Predict(ToolSignature)

# Make a prediction
response = predictor(
    question="What's the weather in Tokyo?",
    tools=list(tools.values())
)

# Execute the tool calls
for call in response.outputs.tool_calls:
    # Execute the tool call
    result = call.execute()
    print(f"Tool: {call.name}")
    print(f"Args: {call.args}")
    print(f"Result: {result}")

In [8]:
import dspy

dspy.configure(lm=dspy.LM(model="gpt-4o-mini"))

class QASignature(dspy.Signature):
    """Answer questions based on provided context with reasoning and confidence."""
    context: list[str] = dspy.InputField(desc="Relevant context retrieved from our database")
    question = dspy.InputField()
    reasoning = dspy.OutputField()
    answer = dspy.OutputField()
    confidence: float = dspy.OutputField()

qa_types_class = dspy.Predict(QASignature)

response = qa_types_class(
    context=["The speed of light is 300,000 km/s.", "The speed of sound is 343 m/s."],
    question="What is the speed of light?")

dspy.settings.lm.inspect_history()





[34m[2025-10-23T14:32:04.394731][0m

[31mSystem message:[0m

Your input fields are:
1. `context` (list[str]): Relevant context retrieved from our database
2. `question` (str):
Your output fields are:
1. `reasoning` (str): 
2. `answer` (str): 
3. `confidence` (float):
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## context ## ]]
{context}

[[ ## question ## ]]
{question}

[[ ## reasoning ## ]]
{reasoning}

[[ ## answer ## ]]
{answer}

[[ ## confidence ## ]]
{confidence}        # note: the value you produce must be a single float value

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Answer questions based on provided context with reasoning and confidence.


[31mUser message:[0m

[[ ## context ## ]]
["The speed of light is 300,000 km/s.", "The speed of sound is 343 m/s."]

[[ ## question ## ]]
What is the speed of light?

Respond with the corresponding output fields, starting with the field

In [13]:
import dspy

# Make a simple Q&A signature
class QASignature(dspy.Signature):
    question = dspy.InputField()
    answer = dspy.OutputField()

# Create a module with a signature
module = dspy.Predict(QASignature)

module(question="What is the capital of France?")

Prediction(
    answer='The capital of France is Paris.'
)

In [15]:
from typing import Literal

# Add a classifier to identify complex questions
class ClassifySignature(dspy.Signature):
    question = dspy.InputField()
    complexity: Literal["simple", "complex"] = dspy.OutputField()


# Create a module that handles the router pipeline
class SmartQA(dspy.Module):
    def __init__(self):
        super().__init__() # inherits from dspy.Module

        # Create three different modules in one
        self.classify = dspy.Predict(ClassifySignature)
        self.simple_qa = dspy.Predict(QASignature)
        self.complex_qa = dspy.ChainOfThought(QASignature)

    # The forward method runs when the module is called
    def forward(self, question):
        # Determine question complexity
        classification = self.classify(question=question)

        # Route to appropriate strategy
        if classification.complexity == "simple":
            return self.simple_qa(question=question)
        else:
            return self.complex_qa(question=question)

# Create the module
smart_qa = SmartQA()

# Use the module like a function
response = smart_qa(question="What is the capital of France?")
print(response)

smart_response = smart_qa(question="Explain the historical and geopolitical factors that led to Paris becoming the capital of France, and how this has influenced French centralization.")
print(smart_response)


Prediction(
    answer='The capital of France is Paris.'
)
Prediction(
    reasoning="Paris became the capital of France due to a combination of historical, geographical, and political factors. Historically, Paris was established as a significant settlement by the Parisii tribe in the 3rd century BC and later became the center of the Frankish kingdom under Clovis I in the 5th century. Its strategic location along the Seine River facilitated trade and communication, making it an attractive site for governance.\n\nGeopolitically, the consolidation of power by the Capetian dynasty in the 10th century further solidified Paris's status as the capital. The Capetians chose Paris as their seat of power, which allowed them to exert control over the surrounding regions. The city's growth was also influenced by its role in the development of the French state, particularly during the medieval period when it became a hub for culture, education, and politics.\n\nThe influence of Paris as the capital

In [None]:
dataset = [
    {
        "question": "What is the capital of France?",
        "context": "France is a country in Western Europe. Its capital is Paris, which is known for its art, fashion, gastronomy and culture.",
        "answer": "Paris"
    },
    {
        "question": "What is the speed of light?",
        "context": "The speed of light in a vacuum is approximately 299,792 kilometers per second, often rounded to 300,000 km/s.",
        "answer": "300,000 kilometers per second"
    }
]

examples = [
    dspy.Example(question=item["question"], 
    context=item["context"], 
    answer=item["answer"]).with_inputs("question", "context")
    for item in dataset
]

[Example({'question': 'What is the capital of France?', 'context': 'France is a country in Western Europe. Its capital is Paris, which is known for its art, fashion, gastronomy and culture.', 'answer': 'Paris'}) (input_keys={'context', 'question'}),
 Example({'question': 'What is the speed of light?', 'context': 'The speed of light in a vacuum is approximately 299,792 kilometers per second, often rounded to 300,000 km/s.', 'answer': '300,000 kilometers per second'}) (input_keys={'context', 'question'})]

In [None]:
examples = [
    dspy.Example(
        question="What is the capital of France?",
        context="France is a country in Western Europe. Its capital is Paris, which is known for its art, fashion, gastronomy and culture.",
        answer="Paris"
    ).with_inputs("question", "context"),
    dspy.Example(
        question="What is the speed of light?",
        context="The speed of light in a vacuum is approximately 299,792 kilometers per second, often rounded to 300,000 km/s.",
        answer="300,000 km/s"
    ).with_inputs("question", "context")
]


In [None]:
def qa_metric(example, pred, trace=None):
    """Returns a float between 0.0 and 1.0 based on word overlap, or boolean if trace is provided."""
    gold_words = example.answer.lower().split()
    pred_words = set(pred.answer.lower().split())
    
    if len(gold_words) == 0:
        return False if trace is not None else 0.0
    
    # Count how many words in gold are in pred
    matching_words = sum(1 for word in gold_words if word in pred_words)
    score = matching_words / len(gold_words)

    # Return boolean during optimization if score is > 0.75
    if trace is not None:
        return score > 0.75

    # Otherwise return the score
    return score

# Test the metric
score = qa_metric(examples[0], dspy.Prediction(answer="Paris"))
print(f"Metric score: {score}")


In [None]:
# Explanation of the difference between trainset, devset (valset), and testset

"""
- **Train set**: The subset of data used to train machine learning models. The model learns patterns and fits parameters using only this data.

- **Dev set / Validation set (valset)**: The subset of data used to tune hyperparameters, choose models, or stop training (early stopping). This set helps you check the model's performance during training and select the best version, but it's not directly used to update the model's parameters.

- **Test set**: The reserved data used to assess the final performance of the model after all training and tuning are complete. This provides an unbiased estimate of the model's ability to generalize to new, unseen data.

Often, "dev set" and "validation set" mean the same thing and are used interchangeably.
"""


In [24]:
import random

# Split: 30% train, 50% val, 20% test
random.shuffle(examples)

n = len(examples)
n_train = int(0.3 * n)
n_val = int(0.5 * n)

trainset = examples[:n_train]
valset = examples[n_train:n_train + n_val]
testset = examples[n_train + n_val:]

print("Train set:", len(trainset))
print("Val set:", len(valset))
print("Test set:", len(testset))

Train set: 0
Val set: 1
Test set: 1


In [None]:
import dspy

# Make a simple Q&A signature
class QASignature(dspy.Signature):
    question = dspy.InputField()
    answer = dspy.OutputField()

# Create a module with a signature
module = dspy.Predict(QASignature)

result = module(question="What is the capital of France?")
print(result.answer)


dataset = [
    {
        "question": "What is the capital of France?",
        "context": "France is a country in Western Europe. Its capital is Paris, which is known for its art, fashion, gastronomy and culture.",
        "answer": "Paris"
    },
    {
        "question": "What is the speed of light?",
        "context": "The speed of light in a vacuum is approximately 299,792 kilometers per second, often rounded to 300,000 km/s.",
        "answer": "300,000 km/s"
    }
]

examples = [
    dspy.Example(
question=item["question"], 
    	context=item["context"], 
    	answer=item["answer"]
).with_inputs("question", "context")
    for item in dataset
]

scores = []
for example in examples:
    result = module(question=example.question)
    score = qa_metric(example, result)
    scores.append(score)

print(f"Average score: {sum(scores) / len(scores)}")
