# AI as a Judge for Prompt Modification
Use AI as a judge to modify a prompt to navigate it through a 'maze'

In [42]:
import sys
import os
import dspy 

import dspy 
from pydantic import BaseModel, Field, StrictInt, validator
from typing import Literal

from common.my_settings import MySettings  
from common.utils import md
from common.llm_client_factory import LlmClientFactory
from dspy_utils.dspy_helpers import md_dspy

settings = MySettings().get()

Getting keys from environment variables


In [43]:
lm = dspy.LM(
    'gpt-4.1', 
    model_type='chat', 
    cache=False, 
    api_key=settings.OPENAI_API_KEY,
    temperature=0.8     
)

dspy.configure(lm=lm)

In [None]:
class Widget(BaseModel):
    shape: Literal["round", "square", "triangle"] = dspy.OutputField()
    weightInKg: StrictInt = dspy.OutputField(desc="Must be postive integer")
    colour: str = dspy.OutputField(desc="Colour of the widget")

class WidgetOrder(dspy.Signature):
    """
    Follow the `order_placement_instructions` to place an order for a widget. 
    Double check that the created widget meets the instructions exactly.
    Go through all the details carefully to ensure accuracy and look at each property and double check the values match the rules
    """

    order_placement_instructions: str = dspy.InputField()

    widget: Widget = dspy.OutputField()
    
class WidgetOrderInstructionCreator(dspy.Signature):
    """
    You are an expert at creating orders for widgets. You use previously provided feedback and widgets order attempts to improve the order placement instructions.
    Create detailed `order_placement_instructions` for placing an order for a widget based on the original widget details and instructions.
    Only use the properties of the `previous_widget` and the `order_validator_feedback` to improve the order placement instructions.
    Keep the instructions as concise as possible and to the point.
    """

    order_instructions: str = dspy.InputField(desc="These are the instructions to use in order to create the order.")
    previous_widget: Widget = dspy.InputField(desc="This was the previous widget that was placed in the order and was rejected. If it is None then this is the first attempt to place an order.")
    order_validator_feedback: list[str] = dspy.InputField(desc="These feedback from the order validator about why the order was rejected previously, if applicable.")

    order_placement_instructions: str = dspy.OutputField()

################################################################
# Function to place order, it has the validation rules in it

def place_order(order: WidgetOrder) -> str:
    """
    Places orders for widgets. Returns 'Valid' if the order is valid, otherwise returns a string with the reason for rejection.

    Return: 
    - str: Returns 'Valid' if the order is valid, otherwise returns a string with the reason for rejection.
    """

    # Rule 1: 
    if order.widget.shape.lower() == "round" and order.widget.weightInKg <= 5:
        msg = f"""**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`={order.widget.weightInKg} 
        Try again with a different shape or weight."""
        md(msg)
        return msg
    
    # Rule 2:
    dividor = 4
    if (order.widget.weightInKg % dividor) != 0:
        msg = f"**Order rejected**: Widget 'weightInKg' is {order.widget.weightInKg}kg, this must divisible by {dividor}."
        md(msg)
        return msg
    
    # Rule 3: 
    if order.widget.colour.lower() == "red":
        msg = "**Order rejected**: The `colour` chosen is red. We cannot make 'red' widgets at the moment, choose a different colour."
        md(msg)
        return msg

    return "Valid"

/tmp/ipykernel_35665/2363346066.py:6: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  @validator("weightInKg")


In [55]:
create_order = dspy.ChainOfThought(WidgetOrder)
create_order_instructions = dspy.ChainOfThought(WidgetOrderInstructionCreator)

################################################################
# Place order 

loopCount = 0

instructions = "Create a red widget that is round and 3kg in weight." # Will hit the first rule
#instructions = "Create a widget that is a triangle and 3kg in weight." # Miss first rule and hit divisor rule
widget = None
order_validator_feedback = []

md("**Original instructions**: ", instructions)

while loopCount < 5:
    md("# loopCount: ", loopCount)
    
    ##########################################################
    md("## Create order instructions ")
    md('instructions: ', instructions)
    md('order_validator_feedback: ', order_validator_feedback)
    order_instructions = create_order_instructions(previous_widget=widget, order_instructions=instructions, order_validator_feedback=order_validator_feedback)
    md("order_instructions created: ", order_instructions)
    instructions = order_instructions.order_placement_instructions
    md("**Order instructions**: ", instructions)
    
    ##########################################################
    md("## Create order object")
    
    md("order_placement_instructions: ", instructions)
    order = create_order(order_placement_instructions=instructions)
    md("**Order object created**: ", order)
    md('order.widget: ***', order.widget, '***')
    
    ##########################################################
    md("## Place order & validate")
    response = place_order(order=order)
    
    if response == "Valid":
        md("", "### Order placed successfully")
        break
    
    order_validator_feedback.append(response)
    md("**Cumulative rejection reasons**: ")
    msgs = []
    for i in range(len(order_validator_feedback)):
        md(f"* ***{i+1}***: {order_validator_feedback[i]}")
        

    loopCount += 1

**Original instructions**: Create a red widget that is round and 3kg in weight.

# loopCount: 0

## Create order instructions 

instructions: Create a red widget that is round and 3kg in weight.

order_validator_feedback: []

order_instructions created: Prediction(  
    reasoning='This is the first attempt at placing the order, so there is no previous widget or validator feedback to consider. The task is to create a widget that is red in color, round in shape, and weighs 3kg.',  
    order_placement_instructions='Order a widget with the following specifications: color—red, shape—round, weight—3kg.'  
)

**Order instructions**: Order a widget with the following specifications: color—red, shape—round, weight—3kg.

## Create order object

order_placement_instructions: Order a widget with the following specifications: color—red, shape—round, weight—3kg.

**Order object created**: Prediction(  
    reasoning='The order placement instructions specify a widget with the following characteristics:\n- Colour: red\n- Shape: round\n- Weight: 3kg\n\nI will double-check each property:\n- Colour is "red", which matches the instruction.\n- Shape is "round", which is one of the valid enum values ("round", "square", "triangle").\n- Weight is 3 (as an integer), which matches "3kg" and is a positive integer.\n\nAll specifications match the requirements.',  
    widget=Widget(shape='round', weightInKg=3, colour='red')  
)

order.widget: ***shape='round' weightInKg=3 colour='red'***

## Place order & validate

**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3   
        Try again with a different shape or weight.

**Cumulative rejection reasons**: 

* ***1***: **Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3   
        Try again with a different shape or weight.

# loopCount: 1

## Create order instructions 

instructions: Order a widget with the following specifications: color—red, shape—round, weight—3kg.

order_validator_feedback: ['**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3 \n        Try again with a different shape or weight.']

order_instructions created: Prediction(  
    reasoning='The order was rejected because round widgets must weigh more than 5kg, but the requested weight was only 3kg. To comply with the requirements, I need to either increase the weight above 5kg or select a different shape. Since the original instructions specify a round shape, I will adjust the weight to 6kg to fulfill both requirements.',  
    order_placement_instructions='Order a widget with the following specifications: color—red, shape—round, weight—6kg.'  
)

**Order instructions**: Order a widget with the following specifications: color—red, shape—round, weight—6kg.

## Create order object

order_placement_instructions: Order a widget with the following specifications: color—red, shape—round, weight—6kg.

**Order object created**: Prediction(  
    reasoning='The order placement instructions specify three characteristics for the widget: the color must be red, the shape must be round, and the weight must be 6kg. I will ensure each property matches exactly with the given requirements and double check for accuracy:\n- Colour: "red" (matches the instruction)\n- Shape: "round" (matches one of the allowed enum values and the instruction)\n- Weight: 6 (positive integer, matches the required value in kg)\nAll criteria are met.',  
    widget=Widget(shape='round', weightInKg=6, colour='red')  
)

order.widget: ***shape='round' weightInKg=6 colour='red'***

## Place order & validate

**Order rejected**: Widget weight is 6kg, this must divisible by 4.

**Cumulative rejection reasons**: 

* ***1***: **Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3   
        Try again with a different shape or weight.

* ***2***: **Order rejected**: Widget weight is 6kg, this must divisible by 4.

# loopCount: 2

## Create order instructions 

instructions: Order a widget with the following specifications: color—red, shape—round, weight—6kg.

order_validator_feedback: ['**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3 \n        Try again with a different shape or weight.', '**Order rejected**: Widget weight is 6kg, this must divisible by 4.']

order_instructions created: Prediction(  
    reasoning='The order was rejected twice: first because the weight for a round widget was set to 3kg, which is not more than 5kg (the new weight of 6kg should satisfy this), and second because 6kg is not divisible by 4. Therefore, for a valid order, the weight must be both greater than 5kg and divisible by 4. The smallest weight that satisfies both is 8kg.',  
    order_placement_instructions='Order a widget with the following specifications: color—red, shape—round, weight—8kg.'  
)

**Order instructions**: Order a widget with the following specifications: color—red, shape—round, weight—8kg.

## Create order object

order_placement_instructions: Order a widget with the following specifications: color—red, shape—round, weight—8kg.

**Order object created**: Prediction(  
    reasoning="The order specifies that the widget should be red in color, have a round shape, and weigh 8kg. I will set the 'colour' as 'red', 'shape' as 'round', and 'weightInKg' as 8. I have double-checked to ensure all specifications are adhered to exactly as requested.",  
    widget=Widget(shape='round', weightInKg=8, colour='red')  
)

order.widget: ***shape='round' weightInKg=8 colour='red'***

## Place order & validate

**Order rejected**: The `colour` chosen is red. We cannot make 'red' widgets at the moment, choose a different colour.

**Cumulative rejection reasons**: 

* ***1***: **Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3   
        Try again with a different shape or weight.

* ***2***: **Order rejected**: Widget weight is 6kg, this must divisible by 4.

* ***3***: **Order rejected**: The `colour` chosen is red. We cannot make 'red' widgets at the moment, choose a different colour.

# loopCount: 3

## Create order instructions 

instructions: Order a widget with the following specifications: color—red, shape—round, weight—8kg.

order_validator_feedback: ['**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3 \n        Try again with a different shape or weight.', '**Order rejected**: Widget weight is 6kg, this must divisible by 4.', "**Order rejected**: The `colour` chosen is red. We cannot make 'red' widgets at the moment, choose a different colour."]

order_instructions created: Prediction(  
    reasoning='The order validator feedback points out three key problems: (1) round widgets cannot weigh less than or equal to 5kg, (2) the weight must be divisible by 4, and (3) red widgets are currently not available. To create an acceptable order, I need to select a color other than red, keep the shape as round, and set the weight to greater than 5kg and divisible by 4. The smallest such weight is 8kg.',  
    order_placement_instructions='Order a widget with the following specifications: color—blue, shape—round, weight—8kg.'  
)

**Order instructions**: Order a widget with the following specifications: color—blue, shape—round, weight—8kg.

## Create order object

order_placement_instructions: Order a widget with the following specifications: color—blue, shape—round, weight—8kg.

**Order object created**: Prediction(  
    reasoning='The instructions specify a widget with color blue, shape round, and weight 8kg. I will ensure that the color is set to "blue", the shape to "round" (which is an allowed enum value), and the weight as a positive integer 8. All values match the requirements and conform to the schema.',  
    widget=Widget(shape='round', weightInKg=8, colour='blue')  
)

order.widget: ***shape='round' weightInKg=8 colour='blue'***

## Place order & validate



### Order placed successfully