In [1]:
%load_ext autoreload
%autoreload 2

# Initialize notebook

In [2]:
import dspy

In [3]:
# Clear DSPy's global disk cache
if hasattr(dspy, 'cache') and hasattr(dspy.cache, 'disk_cache'):
    dspy.cache.disk_cache.clear()
    print("clear")

clear


In [4]:
def debug_dspy_prompt(lm):
    for c in (lm.history[-1]['messages']):
        print(f"{c['role'].upper()}: \n{c['content']}")
        print("="*20)
    print(lm.history[-1]['outputs'])

In [5]:
# native_lm = dspy.LM(
#     model="ollama/llama3.2-vision:11b",
#     api_base="http://localhost:11434",
#     temperature=0.0
# )

# Fully integrated with DriverLM, using ollama_request_fn, ollama_output_fn

Note: ollama_request_fn and ollama_output_fn are just wrapper functions. You can bring any LLM to work with DriverLM by following ModelResponse, Usage formats

In [None]:
from package.base import DriverLM, ModelResponse, Usage
import httpx

ollama_client = httpx.Client(timeout=600.0)

def ollama_request_fn(prompt: str | None = None, messages: list[dict] | None = None, temperature: float = 0.0, max_tokens: int = 256) -> dict:
    if messages is None:
        messages = [{"role": "user", "content": prompt}]
    
    response = ollama_client.post(
        'http://localhost:11434/api/chat',
        json={
            "model": "llama3.2-vision:11b",
            "messages": messages,
            "stream": False,
            "options": {"temperature": temperature}
        }
    )
    response.raise_for_status()
    return response.json()  # Return full Ollama response

def ollama_output_fn(response: dict) -> ModelResponse:
    content = response.get("message", {}).get("content", "")
    model = response.get("model", "custom")
    
    usage = Usage(
        prompt_tokens=response.get("prompt_eval_count", 0),
        completion_tokens=response.get("eval_count", 0),
        total_tokens=response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
    )
    
    return ModelResponse.from_text(text=content.strip(), usage=usage, model=model)


native_lm = DriverLM(
    request_fn=ollama_request_fn,
    output_fn=ollama_output_fn,
    cache=True
)

native_lm.clear_cache()  # Clear old cache entries

In [7]:
test = ollama_request_fn("Hi")
ollama_output_fn(test)

ModelResponse(id='chatcmpl-22488cd7-6db0-449d-880c-7019a54bfae4', created=1768722955, model='llama3.2-vision:11b', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='How can I assist you?', role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None, reasoning_content=None))], usage={'prompt_tokens': 10, 'completion_tokens': 7, 'total_tokens': 17}, cache_hit=False)

In [8]:
test

{'model': 'llama3.2-vision:11b',
 'created_at': '2026-01-18T07:55:55.0877443Z',
 'message': {'role': 'assistant', 'content': 'How can I assist you?'},
 'done': True,
 'done_reason': 'stop',
 'total_duration': 490754400,
 'load_duration': 124855000,
 'prompt_eval_count': 10,
 'prompt_eval_duration': 309936900,
 'eval_count': 7,
 'eval_duration': 53454600}

In [9]:
native_lm.history

[]

# Simple QA: question -> answer

In [10]:
class QA(dspy.Signature):
    """Answer questions accurately and concisely."""    
    question: str = dspy.InputField(desc="The question to answer")
    answer: str = dspy.OutputField(desc="A clear, concise answer")

dspy.configure(lm=native_lm)
qa = dspy.Predict(QA)
result = qa(question="What is the capital of Thailand?")
print(result.answer)
result

Bangkok


Prediction(
    answer='Bangkok'
)

In [11]:
debug_dspy_prompt(native_lm)

SYSTEM: 
Your input fields are:
1. `question` (str): The question to answer
Your output fields are:
1. `answer` (str): A clear, concise answer
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Answer questions accurately and concisely.
USER: 
[[ ## question ## ]]
What is the capital of Thailand?

Respond with the corresponding output fields, starting with the field `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.
['[[ ## answer ## ]]\nBangkok\n\n[[ ## completed ## ]]']


In [12]:
qa.signature

QA(question -> answer
    instructions='Answer questions accurately and concisely.'
    question = Field(annotation=str required=True json_schema_extra={'desc': 'The question to answer', '__dspy_field_type': 'input', 'prefix': 'Question:'})
    answer = Field(annotation=str required=True json_schema_extra={'desc': 'A clear, concise answer', '__dspy_field_type': 'output', 'prefix': 'Answer:'})
)

In [13]:
native_lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': 'Your input fields are:\n1. `question` (str): The question to answer\nYour output fields are:\n1. `answer` (str): A clear, concise answer\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## question ## ]]\n{question}\n\n[[ ## answer ## ]]\n{answer}\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Answer questions accurately and concisely.'},
  {'role': 'user',
   'content': '[[ ## question ## ]]\nWhat is the capital of Thailand?\n\nRespond with the corresponding output fields, starting with the field `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.'}],
 'kwargs': {},
 'response': ModelResponse(id='chatcmpl-5f31e137-6ef4-4bf5-a5ca-173bb0c04f58', created=1768722955, model='llama3.2-vision:11b', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=M

# SentimentClassifier with limited options: text -> reason, sentiment

In [14]:
from typing import Literal

class SentimentClassifier(dspy.Signature):
    """Classify sentiment based from text"""
    text:str = dspy.InputField(desc="Input text")
    reason:str = dspy.OutputField(desc="Reason of classification")
    sentiment:Literal["positive", "neutral", "negative"] = dspy.OutputField(desc="Pick only one: positive, neutral, negative")

demos = [
    dspy.Example(text="I love this pizza!", sentiment="positive").with_inputs("text"),
    dspy.Example(text="This is terrible.", sentiment="negative").with_inputs("text"),
    dspy.Example(text="It's okay.", sentiment="neutral").with_inputs("text"),
]

sc = dspy.Predict(SentimentClassifier)
sc.demos = demos
result = sc(text="I love this pizza!")
print(result.reason)
print(result.sentiment)
result

Positive sentiment due to the use of the word "love".
positive


Prediction(
    reason='Positive sentiment due to the use of the word "love".',
    sentiment='positive'
)

In [15]:
debug_dspy_prompt(native_lm)

SYSTEM: 
Your input fields are:
1. `text` (str): Input text
Your output fields are:
1. `reason` (str): Reason of classification
2. `sentiment` (Literal['positive', 'neutral', 'negative']): Pick only one: positive, neutral, negative
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## reason ## ]]
{reason}

[[ ## sentiment ## ]]
{sentiment}        # note: the value you produce must exactly match (no extra characters) one of: positive; neutral; negative

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Classify sentiment based from text
USER: 
This is an example of the task, though some input or output fields are not supplied.

[[ ## text ## ]]
I love this pizza!
ASSISTANT: 
[[ ## reason ## ]]
Not supplied for this particular example. 

[[ ## sentiment ## ]]
positive

[[ ## completed ## ]]

USER: 
This is an example of the task, though some input or output fields are not supplied.


In [16]:
sc

Predict(SentimentClassifier(text -> reason, sentiment
    instructions='Classify sentiment based from text'
    text = Field(annotation=str required=True json_schema_extra={'desc': 'Input text', '__dspy_field_type': 'input', 'prefix': 'Text:'})
    reason = Field(annotation=str required=True json_schema_extra={'desc': 'Reason of classification', '__dspy_field_type': 'output', 'prefix': 'Reason:'})
    sentiment = Field(annotation=Literal['positive', 'neutral', 'negative'] required=True json_schema_extra={'desc': 'Pick only one: positive, neutral, negative', '__dspy_field_type': 'output', 'prefix': 'Sentiment:'})
))

In [17]:
native_lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': "Your input fields are:\n1. `text` (str): Input text\nYour output fields are:\n1. `reason` (str): Reason of classification\n2. `sentiment` (Literal['positive', 'neutral', 'negative']): Pick only one: positive, neutral, negative\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## text ## ]]\n{text}\n\n[[ ## reason ## ]]\n{reason}\n\n[[ ## sentiment ## ]]\n{sentiment}        # note: the value you produce must exactly match (no extra characters) one of: positive; neutral; negative\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Classify sentiment based from text"},
  {'role': 'user',
   'content': 'This is an example of the task, though some input or output fields are not supplied.\n\n[[ ## text ## ]]\nI love this pizza!'},
  {'role': 'assistant',
   'content': '[[ ## reason ## ]]\nNot supplied for this particular example. \n\n[[ ## s

# Summarize with two mixed-datatype inputs (str, int): text, max_words -> summary, word_count

In [18]:
class Summarize(dspy.Signature):
    """Summarize text with specific length."""
    
    text: str = dspy.InputField(desc="Text to summarize")
    max_words: int = dspy.InputField(desc="Maximum words in summary")
    
    summary: str = dspy.OutputField(desc="Concise summary")
    word_count: int = dspy.OutputField(desc="Actual word count")

summarizer = dspy.Predict(Summarize)

text = """
Artificial intelligence (AI) has revolutionized numerous industries over the past decade. 
From healthcare to finance, AI systems are now capable of performing complex tasks that 
once required human expertise. Machine learning algorithms can analyze vast amounts of data 
to identify patterns and make predictions with remarkable accuracy. In healthcare, AI assists 
doctors in diagnosing diseases by analyzing medical images and patient records. Financial 
institutions use AI for fraud detection and algorithmic trading. However, the rapid advancement 
of AI also raises important ethical questions about privacy, job displacement, and the need 
for responsible AI development. As we continue to integrate AI into our daily lives, it's 
crucial to balance innovation with careful consideration of its societal impact.
"""

result = summarizer(text=text, max_words=50)
print(result.word_count)
print(result.summary)
result

37
Artificial intelligence has revolutionized numerous industries, capable of performing complex tasks with human expertise. Machine learning algorithms analyze vast amounts of data, identifying patterns and making predictions with remarkable accuracy. However, AI's rapid advancement raises important ethical questions about privacy, job displacement, and responsible development.


Prediction(
    summary="Artificial intelligence has revolutionized numerous industries, capable of performing complex tasks with human expertise. Machine learning algorithms analyze vast amounts of data, identifying patterns and making predictions with remarkable accuracy. However, AI's rapid advancement raises important ethical questions about privacy, job displacement, and responsible development.",
    word_count=37
)

In [19]:
debug_dspy_prompt(native_lm)

SYSTEM: 
Your input fields are:
1. `text` (str): Text to summarize
2. `max_words` (int): Maximum words in summary
Your output fields are:
1. `summary` (str): Concise summary
2. `word_count` (int): Actual word count
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## max_words ## ]]
{max_words}

[[ ## summary ## ]]
{summary}

[[ ## word_count ## ]]
{word_count}        # note: the value you produce must be a single int value

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Summarize text with specific length.
USER: 
[[ ## text ## ]]

Artificial intelligence (AI) has revolutionized numerous industries over the past decade. 
From healthcare to finance, AI systems are now capable of performing complex tasks that 
once required human expertise. Machine learning algorithms can analyze vast amounts of data 
to identify patterns and make predictions with remarkable accuracy. In healthca

In [20]:
result.word_count, len(result.summary.split(" "))

(37, 45)

In [21]:
summarizer

Predict(Summarize(text, max_words -> summary, word_count
    instructions='Summarize text with specific length.'
    text = Field(annotation=str required=True json_schema_extra={'desc': 'Text to summarize', '__dspy_field_type': 'input', 'prefix': 'Text:'})
    max_words = Field(annotation=int required=True json_schema_extra={'desc': 'Maximum words in summary', '__dspy_field_type': 'input', 'prefix': 'Max Words:'})
    summary = Field(annotation=str required=True json_schema_extra={'desc': 'Concise summary', '__dspy_field_type': 'output', 'prefix': 'Summary:'})
    word_count = Field(annotation=int required=True json_schema_extra={'desc': 'Actual word count', '__dspy_field_type': 'output', 'prefix': 'Word Count:'})
))

In [22]:

native_lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': 'Your input fields are:\n1. `text` (str): Text to summarize\n2. `max_words` (int): Maximum words in summary\nYour output fields are:\n1. `summary` (str): Concise summary\n2. `word_count` (int): Actual word count\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## text ## ]]\n{text}\n\n[[ ## max_words ## ]]\n{max_words}\n\n[[ ## summary ## ]]\n{summary}\n\n[[ ## word_count ## ]]\n{word_count}        # note: the value you produce must be a single int value\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Summarize text with specific length.'},
  {'role': 'user',
   'content': "[[ ## text ## ]]\n\nArtificial intelligence (AI) has revolutionized numerous industries over the past decade. \nFrom healthcare to finance, AI systems are now capable of performing complex tasks that \nonce required human expertise. Machine learning algorithms c

# UserInfoExtractor with multiple mixed-datatype outputs (str, str, int, float, float, str): text -> first_name, last_name, age, height, weight, email

In [23]:
class UserInfoExtractor(dspy.Signature):
    """Extract user's information"""
    text:str = dspy.InputField(desc="Text containing user's information")
    first_name:str = dspy.OutputField(desc="User's first name")
    last_name:str = dspy.OutputField(desc="User's last name")
    age:int = dspy.OutputField(desc="User's age in years")
    height:float = dspy.OutputField(desc="User's height in cm")
    weight:float = dspy.OutputField(desc="User's weight in kg")
    email:str = dspy.OutputField(desc="User's email address")

uie = dspy.Predict(UserInfoExtractor)

text = """
Patient Registration Form:
Name: Michael Chen
Age: 32 years old
Height: 175.5 cm
Weight: 72.3 kg
Contact: michael.chen@healthmail.com

Please update my medical records with this information.
"""

result = uie(text=text)
result


Prediction(
    first_name='Michael',
    last_name='Chen',
    age=32,
    height=175.5,
    weight=72.3,
    email='michael.chen@healthmail.com'
)

In [24]:
debug_dspy_prompt(native_lm)

SYSTEM: 
Your input fields are:
1. `text` (str): Text containing user's information
Your output fields are:
1. `first_name` (str): User's first name
2. `last_name` (str): User's last name
3. `age` (int): User's age in years
4. `height` (float): User's height in cm
5. `weight` (float): User's weight in kg
6. `email` (str): User's email address
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## first_name ## ]]
{first_name}

[[ ## last_name ## ]]
{last_name}

[[ ## age ## ]]
{age}        # note: the value you produce must be a single int value

[[ ## height ## ]]
{height}        # note: the value you produce must be a single float value

[[ ## weight ## ]]
{weight}        # note: the value you produce must be a single float value

[[ ## email ## ]]
{email}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Extract user's information
USER: 
[[ ## text ## ]]

Patient Registration Fo

In [25]:
uie

Predict(UserInfoExtractor(text -> first_name, last_name, age, height, weight, email
    instructions="Extract user's information"
    text = Field(annotation=str required=True json_schema_extra={'desc': "Text containing user's information", '__dspy_field_type': 'input', 'prefix': 'Text:'})
    first_name = Field(annotation=str required=True json_schema_extra={'desc': "User's first name", '__dspy_field_type': 'output', 'prefix': 'First Name:'})
    last_name = Field(annotation=str required=True json_schema_extra={'desc': "User's last name", '__dspy_field_type': 'output', 'prefix': 'Last Name:'})
    age = Field(annotation=int required=True json_schema_extra={'desc': "User's age in years", '__dspy_field_type': 'output', 'prefix': 'Age:'})
    height = Field(annotation=float required=True json_schema_extra={'desc': "User's height in cm", '__dspy_field_type': 'output', 'prefix': 'Height:'})
    weight = Field(annotation=float required=True json_schema_extra={'desc': "User's weight in kg", 

In [26]:
native_lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': "Your input fields are:\n1. `text` (str): Text containing user's information\nYour output fields are:\n1. `first_name` (str): User's first name\n2. `last_name` (str): User's last name\n3. `age` (int): User's age in years\n4. `height` (float): User's height in cm\n5. `weight` (float): User's weight in kg\n6. `email` (str): User's email address\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## text ## ]]\n{text}\n\n[[ ## first_name ## ]]\n{first_name}\n\n[[ ## last_name ## ]]\n{last_name}\n\n[[ ## age ## ]]\n{age}        # note: the value you produce must be a single int value\n\n[[ ## height ## ]]\n{height}        # note: the value you produce must be a single float value\n\n[[ ## weight ## ]]\n{weight}        # note: the value you produce must be a single float value\n\n[[ ## email ## ]]\n{email}\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective

# DateTimeExtractor: text -> date, time, timezone  

Note: you must convert it into datetime format later in the program

In [27]:
class DateTimeExtractor(dspy.Signature):
    """Extract datetime information from text"""
    text: str = dspy.InputField(desc="Text containing date/time information")
    date: str = dspy.OutputField(desc="Extracted date in YYYY-MM-DD format")
    time: str = dspy.OutputField(desc="Extracted time in HH:MM format (24-hour)")
    timezone: str = dspy.OutputField(desc="Timezone if mentioned, otherwise 'UTC'")

# Create examples
demos = [
    dspy.Example(text="Meeting on January 15, 2024 at 2:30 PM EST", 
                 date="2024-01-15", time="14:30", timezone="EST").with_inputs("text"),
    dspy.Example(text="Deadline: March 3rd, 2024, 9:00 AM", 
                 date="2024-03-03", time="09:00", timezone="UTC").with_inputs("text"),
]

dte = dspy.Predict(DateTimeExtractor)
dte.demos = demos

# Test
result = dte(text="The conference starts on December 25, 2024 at 10:45 AM PST")
print(f"Date: {result.date}")
print(f"Time: {result.time}")
print(f"Timezone: {result.timezone}")
result


Date: 2024-12-25
Time: 10:45
Timezone: PST


Prediction(
    date='2024-12-25',
    time='10:45',
    timezone='PST'
)

In [28]:
debug_dspy_prompt(native_lm)

SYSTEM: 
Your input fields are:
1. `text` (str): Text containing date/time information
Your output fields are:
1. `date` (str): Extracted date in YYYY-MM-DD format
2. `time` (str): Extracted time in HH:MM format (24-hour)
3. `timezone` (str): Timezone if mentioned, otherwise 'UTC'
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## date ## ]]
{date}

[[ ## time ## ]]
{time}

[[ ## timezone ## ]]
{timezone}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Extract datetime information from text
USER: 
[[ ## text ## ]]
Meeting on January 15, 2024 at 2:30 PM EST
ASSISTANT: 
[[ ## date ## ]]
2024-01-15

[[ ## time ## ]]
14:30

[[ ## timezone ## ]]
EST

[[ ## completed ## ]]

USER: 
[[ ## text ## ]]
Deadline: March 3rd, 2024, 9:00 AM
ASSISTANT: 
[[ ## date ## ]]
2024-03-03

[[ ## time ## ]]
09:00

[[ ## timezone ## ]]
UTC

[[ ## completed ## ]]

USER: 
[[ ## text ## ]]
The conference

In [29]:
dte

Predict(DateTimeExtractor(text -> date, time, timezone
    instructions='Extract datetime information from text'
    text = Field(annotation=str required=True json_schema_extra={'desc': 'Text containing date/time information', '__dspy_field_type': 'input', 'prefix': 'Text:'})
    date = Field(annotation=str required=True json_schema_extra={'desc': 'Extracted date in YYYY-MM-DD format', '__dspy_field_type': 'output', 'prefix': 'Date:'})
    time = Field(annotation=str required=True json_schema_extra={'desc': 'Extracted time in HH:MM format (24-hour)', '__dspy_field_type': 'output', 'prefix': 'Time:'})
    timezone = Field(annotation=str required=True json_schema_extra={'desc': "Timezone if mentioned, otherwise 'UTC'", '__dspy_field_type': 'output', 'prefix': 'Timezone:'})
))

In [30]:
native_lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': "Your input fields are:\n1. `text` (str): Text containing date/time information\nYour output fields are:\n1. `date` (str): Extracted date in YYYY-MM-DD format\n2. `time` (str): Extracted time in HH:MM format (24-hour)\n3. `timezone` (str): Timezone if mentioned, otherwise 'UTC'\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## text ## ]]\n{text}\n\n[[ ## date ## ]]\n{date}\n\n[[ ## time ## ]]\n{time}\n\n[[ ## timezone ## ]]\n{timezone}\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Extract datetime information from text"},
  {'role': 'user',
   'content': '[[ ## text ## ]]\nMeeting on January 15, 2024 at 2:30 PM EST'},
  {'role': 'assistant',
   'content': '[[ ## date ## ]]\n2024-01-15\n\n[[ ## time ## ]]\n14:30\n\n[[ ## timezone ## ]]\nEST\n\n[[ ## completed ## ]]\n'},
  {'role': 'user',
   'content': '[[ ## text ## ]]\nDeadlin

# AppointmentExtractor: text -> title, date, time, location

Note: this is an advanced example using nested object inside dspy.Signature to show how prompt is constructed and it's behavior

In [31]:
from typing import List
from pydantic import BaseModel, Field

class Appointment(BaseModel):
    title: str = Field(description="The appointment's title")
    date: str = Field(description="YYYY-MM-DD")
    time: str = Field(description="HH:MM 24-hour")

class AppointmentExtractor(dspy.Signature):
    """Extract appointments"""
    text: str = dspy.InputField(desc="Long input")
    appointments: List[Appointment] = dspy.OutputField(desc="List of appointments with title, datetime")

# Create examples
demos = [
    dspy.Example(
        text="Meeting on Monday Jan 8 at 2pm and dentist on Tuesday Jan 9 at 10am",
        appointments=[
            Appointment(title="Meeting", date="2024-01-08", time="14:00"),
            Appointment(title="dentist", date="2024-01-09", time="10:00")
        ]
    ).with_inputs("text"),
    
    dspy.Example(
        text="Lunch with Sarah on Friday at noon",
        appointments=[
            Appointment(title="Lunch with Sarah", date="2024-01-12", time="12:00")
        ]
    ).with_inputs("text")
]

ae = dspy.Predict(AppointmentExtractor)
ae.demos = demos

text = """
Hi team, here's my schedule for next week:

I have a dentist appointment on Monday, January 15, 2024 at 10:30 AM.
Then there's the quarterly sales review meeting on Tuesday, January 16, 2024 at 2:00 PM.
Don't forget the team lunch on Wednesday, January 17, 2024 at 12:30 PM.
Finally, I have a client presentation scheduled for Friday, January 19, 2024 at 9:00 AM.
"""

result = ae(text=text)
result


Prediction(
    appointments=[Appointment(title='dentist appointment', date='2024-01-15', time='10:30'), Appointment(title='quarterly sales review meeting', date='2024-01-16', time='14:00'), Appointment(title='team lunch', date='2024-01-17', time='12:30'), Appointment(title='client presentation', date='2024-01-19', time='09:00')]
)

In [32]:
result.appointments

[Appointment(title='dentist appointment', date='2024-01-15', time='10:30'),
 Appointment(title='quarterly sales review meeting', date='2024-01-16', time='14:00'),
 Appointment(title='team lunch', date='2024-01-17', time='12:30'),
 Appointment(title='client presentation', date='2024-01-19', time='09:00')]

In [33]:
debug_dspy_prompt(native_lm)

SYSTEM: 
Your input fields are:
1. `text` (str): Long input
Your output fields are:
1. `appointments` (list[Appointment]): List of appointments with title, datetime
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## appointments ## ]]
{appointments}        # note: the value you produce must adhere to the JSON schema: {"type": "array", "$defs": {"Appointment": {"type": "object", "properties": {"date": {"type": "string", "description": "YYYY-MM-DD", "title": "Date"}, "time": {"type": "string", "description": "HH:MM 24-hour", "title": "Time"}, "title": {"type": "string", "description": "The appointment's title", "title": "Title"}}, "required": ["title", "date", "time"], "title": "Appointment"}}, "items": {"$ref": "#/$defs/Appointment"}}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Extract appointments
USER: 
[[ ## text ## ]]
Meeting on Monday Jan 8 at 2pm and dentist on Tuesd

In [34]:
ae

Predict(AppointmentExtractor(text -> appointments
    instructions='Extract appointments'
    text = Field(annotation=str required=True json_schema_extra={'desc': 'Long input', '__dspy_field_type': 'input', 'prefix': 'Text:'})
    appointments = Field(annotation=List[Appointment] required=True json_schema_extra={'desc': 'List of appointments with title, datetime', '__dspy_field_type': 'output', 'prefix': 'Appointments:'})
))

In [35]:
native_lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': 'Your input fields are:\n1. `text` (str): Long input\nYour output fields are:\n1. `appointments` (list[Appointment]): List of appointments with title, datetime\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## text ## ]]\n{text}\n\n[[ ## appointments ## ]]\n{appointments}        # note: the value you produce must adhere to the JSON schema: {"type": "array", "$defs": {"Appointment": {"type": "object", "properties": {"date": {"type": "string", "description": "YYYY-MM-DD", "title": "Date"}, "time": {"type": "string", "description": "HH:MM 24-hour", "title": "Time"}, "title": {"type": "string", "description": "The appointment\'s title", "title": "Title"}}, "required": ["title", "date", "time"], "title": "Appointment"}}, "items": {"$ref": "#/$defs/Appointment"}}\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Extract appointments'},
 

In [36]:
# Test cache
result = ae(text=text)
result

Prediction(
    appointments=[Appointment(title='dentist appointment', date='2024-01-15', time='10:30'), Appointment(title='quarterly sales review meeting', date='2024-01-16', time='14:00'), Appointment(title='team lunch', date='2024-01-17', time='12:30'), Appointment(title='client presentation', date='2024-01-19', time='09:00')]
)

In [37]:
debug_dspy_prompt(native_lm)

SYSTEM: 
Your input fields are:
1. `text` (str): Long input
Your output fields are:
1. `appointments` (list[Appointment]): List of appointments with title, datetime
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## appointments ## ]]
{appointments}        # note: the value you produce must adhere to the JSON schema: {"type": "array", "$defs": {"Appointment": {"type": "object", "properties": {"date": {"type": "string", "description": "YYYY-MM-DD", "title": "Date"}, "time": {"type": "string", "description": "HH:MM 24-hour", "title": "Time"}, "title": {"type": "string", "description": "The appointment's title", "title": "Title"}}, "required": ["title", "date", "time"], "title": "Appointment"}}, "items": {"$ref": "#/$defs/Appointment"}}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Extract appointments
USER: 
[[ ## text ## ]]
Meeting on Monday Jan 8 at 2pm and dentist on Tuesd

In [38]:
native_lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': 'Your input fields are:\n1. `text` (str): Long input\nYour output fields are:\n1. `appointments` (list[Appointment]): List of appointments with title, datetime\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## text ## ]]\n{text}\n\n[[ ## appointments ## ]]\n{appointments}        # note: the value you produce must adhere to the JSON schema: {"type": "array", "$defs": {"Appointment": {"type": "object", "properties": {"date": {"type": "string", "description": "YYYY-MM-DD", "title": "Date"}, "time": {"type": "string", "description": "HH:MM 24-hour", "title": "Time"}, "title": {"type": "string", "description": "The appointment\'s title", "title": "Title"}}, "required": ["title", "date", "time"], "title": "Appointment"}}, "items": {"$ref": "#/$defs/Appointment"}}\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Extract appointments'},
 

# This is how dspy conceptually parses output into a well-structured format

In [39]:
import re

def parse_field(text, field_name):
    # Pattern: [[ ## field_name ## ]]\nvalue\n\n[[ ## next_field or completed ## ]]
    pattern = rf'\[\[\s*##\s*{field_name}\s*##\s*\]\]\s*\n(.*?)\n\n\[\['
    match = re.search(pattern, text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None

In [40]:
import json
from typing import get_origin, get_args

text = native_lm.history[-1]['outputs'][0]
signature = ae.signature

_result = {}
for field_name, field in signature.output_fields.items():
    raw_value = parse_field(text, field_name)
    field_type = field.annotation
    
    # Check if it's a Pydantic model or List of Pydantic models
    origin = get_origin(field_type)  # Returns list, dict, etc.
    
    if origin is list:
        # Get the inner type (e.g., Appointment from List[Appointment])
        inner_type = get_args(field_type)[0]
        
        # Check if inner type is a Pydantic BaseModel
        if hasattr(inner_type, 'model_validate'):
            # Parse JSON and create Pydantic instances
            parsed_list = json.loads(raw_value)
            _result[field_name] = [inner_type(**item) for item in parsed_list]
        else:
            _result[field_name] = json.loads(raw_value)
    
    elif hasattr(field_type, 'model_validate'):
        # Single Pydantic model
        parsed_dict = json.loads(raw_value)
        _result[field_name] = field_type(**parsed_dict)
    
    elif field_type == int:
        _result[field_name] = int(raw_value)
    elif field_type == float:
        _result[field_name] = float(raw_value)
    else:
        _result[field_name] = raw_value
    
    print(f"{field_name} ({field_type}): {_result[field_name]}")


appointments (typing.List[__main__.Appointment]): [Appointment(title='dentist appointment', date='2024-01-15', time='10:30'), Appointment(title='quarterly sales review meeting', date='2024-01-16', time='14:00'), Appointment(title='team lunch', date='2024-01-17', time='12:30'), Appointment(title='client presentation', date='2024-01-19', time='09:00')]


In [41]:
hasattr(inner_type, "model_validate")

True

In [42]:
_result

{'appointments': [Appointment(title='dentist appointment', date='2024-01-15', time='10:30'),
  Appointment(title='quarterly sales review meeting', date='2024-01-16', time='14:00'),
  Appointment(title='team lunch', date='2024-01-17', time='12:30'),
  Appointment(title='client presentation', date='2024-01-19', time='09:00')]}

# This is how dspy create a result class with dot notation access

In [43]:
# Simplified version of dspy.Prediction
class Prediction(dict):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        # Store values in dict
        for key, value in kwargs.items():
            self[key] = value
    
    def __getattr__(self, key):
        # Enable dot notation: result.answer
        try:
            return self[key]
        except KeyError:
            raise AttributeError(f"'{type(self).__name__}' object has no attribute '{key}'")
    
    def __setattr__(self, key, value):
        # Enable setting: result.answer = "Bangkok"
        self[key] = value
    
    def __repr__(self):
        items = ', \n'.join(f"\t{k}={repr(v)}" for k, v in self.items())
        return f"Prediction(\n{items}\n)"

# Usage
test = Prediction(**_result)
test


Prediction(
	appointments=[Appointment(title='dentist appointment', date='2024-01-15', time='10:30'), Appointment(title='quarterly sales review meeting', date='2024-01-16', time='14:00'), Appointment(title='team lunch', date='2024-01-17', time='12:30'), Appointment(title='client presentation', date='2024-01-19', time='09:00')]
)

In [44]:
test.appointments

[Appointment(title='dentist appointment', date='2024-01-15', time='10:30'),
 Appointment(title='quarterly sales review meeting', date='2024-01-16', time='14:00'),
 Appointment(title='team lunch', date='2024-01-17', time='12:30'),
 Appointment(title='client presentation', date='2024-01-19', time='09:00')]

In [45]:
native_lm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': 'Your input fields are:\n1. `text` (str): Long input\nYour output fields are:\n1. `appointments` (list[Appointment]): List of appointments with title, datetime\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## text ## ]]\n{text}\n\n[[ ## appointments ## ]]\n{appointments}        # note: the value you produce must adhere to the JSON schema: {"type": "array", "$defs": {"Appointment": {"type": "object", "properties": {"date": {"type": "string", "description": "YYYY-MM-DD", "title": "Date"}, "time": {"type": "string", "description": "HH:MM 24-hour", "title": "Time"}, "title": {"type": "string", "description": "The appointment\'s title", "title": "Title"}}, "required": ["title", "date", "time"], "title": "Appointment"}}, "items": {"$ref": "#/$defs/Appointment"}}\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Extract appointments'},
 