In [1]:
# 1. Install DSPy
!pip install dspy-ai pydantic openai

Collecting dspy-ai
  Downloading dspy_ai-3.0.4-py3-none-any.whl.metadata (285 bytes)
Collecting dspy>=3.0.4 (from dspy-ai)
  Downloading dspy-3.0.4-py3-none-any.whl.metadata (8.4 kB)
Collecting backoff>=2.2 (from dspy>=3.0.4->dspy-ai)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting optuna>=3.4.0 (from dspy>=3.0.4->dspy-ai)
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting magicattr>=0.1.6 (from dspy>=3.0.4->dspy-ai)
  Downloading magicattr-0.1.6-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting litellm>=1.64.0 (from dspy>=3.0.4->dspy-ai)
  Downloading litellm-1.80.5-py3-none-any.whl.metadata (30 kB)
Collecting diskcache>=5.6.0 (from dspy>=3.0.4->dspy-ai)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting json-repair>=0.30.0 (from dspy>=3.0.4->dspy-ai)
  Downloading json_repair-0.54.1-py3-none-any.whl.metadata (12 kB)
Collecting asyncer==0.0.8 (from dspy>=3.0.4->dspy-ai)
  Downloading asyncer-0.0.8-py3-none-any.whl.m

In [None]:
# ----------------------------------------------------------------
import dspy
import os
from pydantic import BaseModel, Field
from typing import Literal, List

lm = dspy.LM(
    "openai/gpt-4.1-2025-04-14",
    api_key="INSERT YOUR API KEY HERE"  # ← replace with your key
)

dspy.configure(lm=lm)

# ----------------------------------------------------------------

In [3]:
# 3. DEFINE CLASSES

# Standard Pydantic model for the data structure
class Place(BaseModel):
    # Note: Using standard Pydantic Field here for compatibility
    address: str = Field(description="The physical address")
    name: str = Field(description="The name of the place")

# The DSPy Signature
class PlaceMatcher(dspy.Signature):
    """Review two places and determine if they are the same place in the real world."""

    place_one: Place = dspy.InputField()
    place_two: Place = dspy.InputField()

    # Output fields
    match: bool = dspy.OutputField(desc="Do the two places refer to the same place?")
    match_confidence: Literal["low", "medium", "high"] = dspy.OutputField()

# Instantiate the predictor
# Note: TypedPredictor is often better for Pydantic outputs,
# but we use Predict to match your screenshot exactly.
matcher = dspy.Predict(PlaceMatcher)


In [4]:
# ----------------------------------------------------------------
# 4. CREATE THE TRAINSET (The missing piece)

# We need examples that have inputs (place_one, place_two) and the label (match)
train_data = [
    # --- TRICKY MATCHES (True) ---
    # Typos and massive abbreviations
    (Place(name="Starbux", address="123 Main St"), Place(name="Starbucks Coffee", address="123 Main Street"), True),
    # Suite number differences that don't matter
    (Place(name="Oracle", address="500 Oracle Pkwy"), Place(name="Oracle Corp", address="500 Oracle Pkwy, #100"), True),
    # Historical name vs Current name
    (Place(name="Sears Tower", address="Chicago"), Place(name="Willis Tower", address="233 S Wacker Dr, Chicago, IL"), True),

    # --- HARD NON-MATCHES (False) ---
    # Same address, different suite (Different businesses)
    (Place(name="Dr. Smith Dentistry", address="100 Medical Dr, Suite A"), Place(name="Dr. Jones Ortho", address="100 Medical Dr, Suite B"), False),
    # Same name, same city, different street (The "Chain" problem)
    (Place(name="Chipotle", address="5th Avenue, NY"), Place(name="Chipotle", address="8th Avenue, NY"), False),
    # Airport Terminals (Often confused as same place, but usually distinct POIs)
    (Place(name="JFK Terminal 4", address="Queens, NY"), Place(name="JFK Terminal 1", address="Queens, NY"), False),
    # The "Building vs Business" trap
    (Place(name="Empire State Building", address="350 5th Ave"), Place(name="LinkedIn Office", address="350 5th Ave"), False),
]

# Convert raw data into DSPy Examples
trainset = []
for p1, p2, is_match in train_data:
    example = dspy.Example(
        place_one=p1,
        place_two=p2,
        match=is_match
    ).with_inputs("place_one", "place_two")
    trainset.append(example)

print(f"Created trainset with {len(trainset)} examples.")

# ----------------------------------------------------------------

Created trainset with 7 examples.


In [8]:
# 5. OPTIMIZATION

def validate_match(example, pred, trace=None):
    # Simple exact match metric
    return example.match == pred.match

from dspy.teleprompt import MIPROv2

# Define the prompter model (Usually a strong model like GPT-4o)
prompt_lm = dspy.LM(
    "openai/gpt-4.1-2025-04-14",
    api_key="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" # Use the same key as 'lm'
)


print("Starting optimization... (this may take a minute)")

# Initialize MIPROv2
tp = dspy.MIPROv2(
    metric=validate_match,
    auto="light",
    prompt_model=prompt_lm,
    task_model=lm
)

# Run the compilation
optimized_matcher = tp.compile(matcher, trainset=trainset)

# Save the result
optimized_matcher.save("optimized_matcher_full_light.json")
print("Optimization complete. Program saved.")

# ----------------------------------------------------------------

2025/11/24 20:16:06 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 10
minibatch: False
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 5

2025/11/24 20:16:06 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/11/24 20:16:06 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/11/24 20:16:06 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Starting optimization... (this may take a minute)
Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


100%|██████████| 2/2 [00:00<00:00, 45.69it/s]


Bootstrapped 2 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 4/6


100%|██████████| 2/2 [00:00<00:00, 51.92it/s]


Bootstrapped 2 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 5/6


 50%|█████     | 1/2 [00:00<00:00, 50.43it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 6/6


 50%|█████     | 1/2 [00:00<00:00, 46.62it/s]
2025/11/24 20:16:06 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/11/24 20:16:06 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.


2025/11/24 20:16:14 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...

2025/11/24 20:16:48 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/11/24 20:16:48 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Review two places and determine if they are the same place in the real world.

2025/11/24 20:16:48 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Given two place records, each containing an address and a business name (which may include abbreviations, minor spelling errors, or differing formats), analyze the provided information and decide whether both entries refer to the same real-world location. In your decision, take into account variations in spelling, common abbreviations, presence of suite or unit numbers, and alternate forms of business names. Output whether the records match (True or False) and provide a confidence rating (e.g., high, medium, or low) along with a brief rationale supporting your confidence level.

2025/11/24 20

Average Metric: 4.00 / 5 (80.0%): 100%|██████████| 5/5 [00:01<00:00,  4.03it/s]

2025/11/24 20:16:49 INFO dspy.evaluate.evaluate: Average Metric: 4 / 5 (80.0%)
2025/11/24 20:16:49 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 80.0

2025/11/24 20:16:50 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 10 =====



Average Metric: 5.00 / 5 (100.0%): 100%|██████████| 5/5 [00:01<00:00,  3.08it/s]

2025/11/24 20:16:51 INFO dspy.evaluate.evaluate: Average Metric: 5 / 5 (100.0%)
2025/11/24 20:16:51 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 100.0
2025/11/24 20:16:51 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/11/24 20:16:51 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0]
2025/11/24 20:16:51 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:51 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 10 =====



Average Metric: 4.00 / 5 (80.0%): 100%|██████████| 5/5 [00:00<00:00,  7.33it/s]

2025/11/24 20:16:52 INFO dspy.evaluate.evaluate: Average Metric: 4 / 5 (80.0%)
2025/11/24 20:16:52 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/11/24 20:16:52 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0]
2025/11/24 20:16:52 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:52 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 10 =====



Average Metric: 5.00 / 5 (100.0%): 100%|██████████| 5/5 [00:00<00:00, 10.01it/s]

2025/11/24 20:16:52 INFO dspy.evaluate.evaluate: Average Metric: 5 / 5 (100.0%)
2025/11/24 20:16:52 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/11/24 20:16:52 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0, 100.0]
2025/11/24 20:16:52 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:52 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 10 =====



Average Metric: 5.00 / 5 (100.0%): 100%|██████████| 5/5 [00:00<00:00,  6.26it/s]

2025/11/24 20:16:53 INFO dspy.evaluate.evaluate: Average Metric: 5 / 5 (100.0%)
2025/11/24 20:16:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2'].
2025/11/24 20:16:53 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0, 100.0, 100.0]
2025/11/24 20:16:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:53 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 10 =====



Average Metric: 4.00 / 5 (80.0%): 100%|██████████| 5/5 [00:00<00:00,  5.05it/s]

2025/11/24 20:16:54 INFO dspy.evaluate.evaluate: Average Metric: 4 / 5 (80.0%)
2025/11/24 20:16:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].
2025/11/24 20:16:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0, 100.0, 100.0, 80.0]
2025/11/24 20:16:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 10 =====



Average Metric: 4.00 / 5 (80.0%): 100%|██████████| 5/5 [00:00<00:00, 713.56it/s]

2025/11/24 20:16:54 INFO dspy.evaluate.evaluate: Average Metric: 4 / 5 (80.0%)
2025/11/24 20:16:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/11/24 20:16:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0, 100.0, 100.0, 80.0, 80.0]
2025/11/24 20:16:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 10 =====



Average Metric: 5.00 / 5 (100.0%): 100%|██████████| 5/5 [00:01<00:00,  3.42it/s]

2025/11/24 20:16:56 INFO dspy.evaluate.evaluate: Average Metric: 5 / 5 (100.0%)
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0, 100.0, 100.0, 80.0, 80.0, 100.0]
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 10 =====



Average Metric: 5.00 / 5 (100.0%): 100%|██████████| 5/5 [00:00<00:00, 611.59it/s]

2025/11/24 20:16:56 INFO dspy.evaluate.evaluate: Average Metric: 5 / 5 (100.0%)
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4'].
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0, 100.0, 100.0, 80.0, 80.0, 100.0, 100.0]
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 10 =====



Average Metric: 5.00 / 5 (100.0%): 100%|██████████| 5/5 [00:00<00:00, 1131.94it/s]

2025/11/24 20:16:56 INFO dspy.evaluate.evaluate: Average Metric: 5 / 5 (100.0%)
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0, 100.0, 100.0, 80.0, 80.0, 100.0, 100.0, 100.0]
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 10 =====



Average Metric: 5.00 / 5 (100.0%): 100%|██████████| 5/5 [00:00<00:00, 824.13it/s]

2025/11/24 20:16:56 INFO dspy.evaluate.evaluate: Average Metric: 5 / 5 (100.0%)
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [80.0, 100.0, 80.0, 100.0, 100.0, 80.0, 80.0, 100.0, 100.0, 100.0, 100.0]
2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/11/24 20:16:56 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 100.0!



Optimization complete. Program saved.


In [9]:
# 6. TEST RUN
print("\n--- Test Run on New Data ---")
test_p1 = Place(name="Louvre", address="Paris")
test_p2 = Place(name="Musee du Louvre", address="Rue de Rivoli, Paris")

# Run the optimized matcher
result = optimized_matcher(place_one=test_p1, place_two=test_p2)

print(f"Input 1: {test_p1.name}")
print(f"Input 2: {test_p2.name}")
print(f"Match: {result.match}")
print(f"Confidence: {result.match_confidence}")


--- Test Run on New Data ---
Input 1: Louvre
Input 2: Musee du Louvre
Match: True
Confidence: high
