# AI as a Judge for Prompt Modification
Use AI as a judge to modify a prompt to navigate it through a 'maze'

In [42]:
import sys
import os
import dspy 

import dspy 
from pydantic import BaseModel, Field, StrictInt, validator
from typing import Literal

from common.my_settings import MySettings  
from common.utils import md
from common.llm_client_factory import LlmClientFactory
from dspy_utils.dspy_helpers import md_dspy

settings = MySettings().get()

Getting keys from environment variables


In [43]:
lm = dspy.LM(
    'gpt-4.1', 
    model_type='chat', 
    cache=False, 
    api_key=settings.OPENAI_API_KEY,
    temperature=0.8     
)

dspy.configure(lm=lm)

In [44]:
class Widget(BaseModel):
    shape: Literal["round", "square", "triangle"] = dspy.OutputField()
    weightInKg: StrictInt = dspy.OutputField(desc="Must be postive integer")
    colour: str = dspy.OutputField(desc="Colour of the widget")
    
    @validator("weightInKg")
    def must_be_positive(cls, v):
        if v <= 0:
            raise ValueError("weightInKg must be > 0")
        return v

class WidgetOrder(dspy.Signature):
    """
    Follow the `order_placement_instructions` to place an order for a widget. 
    Double check that the created widget meets the instructions exactly.
    Go through all the details carefully to ensure accuracy and look at each property and double check the values match the rules
    """

    order_placement_instructions: str = dspy.InputField()

    widget: Widget = dspy.OutputField()
    
class WidgetOrderInstructionCreator(dspy.Signature):
    """
    You are an expert at creating orders for widgets. You use previously provided feedback and widgets order attempts to improve the order placement instructions.
    Create detailed `order_placement_instructions` for placing an order for a widget based on the original widget details and instructions.
    Only use the properties of the `previous_widget` and the `order_validator_feedback` to improve the order placement instructions.
    Keep the instructions as concise as possible and to the point.
    """

    order_instructions: str = dspy.InputField(desc="These are the instructions to use in order to create the order.")
    previous_widget: Widget = dspy.InputField(desc="This was the previous widget that was placed in the order and was rejected. If it is None then this is the first attempt to place an order.")
    order_validator_feedback: list[str] = dspy.InputField(desc="These feedback from the order validator about why the order was rejected previously, if applicable.")

    order_placement_instructions: str = dspy.OutputField()

################################################################
# Function to place order, it has the validation rules in it

def place_order(order: WidgetOrder) -> str:
    """
    Places orders for widgets. Returns 'Valid' if the order is valid, otherwise returns a string with the reason for rejection.

    Return: 
    - str: Returns 'Valid' if the order is valid, otherwise returns a string with the reason for rejection.
    """

    # Rule 1: 
    if order.widget.shape.lower() == "round" and order.widget.weightInKg <= 5:
        msg = f"""**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`={order.widget.weightInKg} 
        Try again with a different shape or weight."""
        md(msg)
        return msg
    
    # Rule 2:
    dividor = 4
    if (order.widget.weightInKg % dividor) != 0:
        msg = f"**Order rejected**: Widget weight is {order.widget.weightInKg}kg, this must divisible by {dividor}."
        md(msg)
        return msg
    
    # Rule 3: 
    if order.widget.colour.lower() == "red":
        msg = "**Order rejected**: The `colour` chosen is red. We cannot make 'red' widgets at the moment, choose a different colour."
        md(msg)
        return msg

    return "Valid"

/tmp/ipykernel_35665/2363346066.py:6: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  @validator("weightInKg")


In [None]:
create_order = dspy.ChainOfThought(WidgetOrder)
create_order_instructions = dspy.ChainOfThought(WidgetOrderInstructionCreator)

################################################################
# Place order 

loopCount = 0

instructions = "Create a red widget that is round and 3kg in weight." # Will hit the first rule
#instructions = "Create a widget that is a triangle and 3kg in weight." # Miss first rule and hit divisor rule
widget = None
order_validator_feedback = []

md("**Original instructions**: ", instructions)

while loopCount < 5:
    md("# loopCount: ", loopCount)
    
    ##########################################################
    md("## Create order instructions ")
    md('instructions: ', instructions)
    md('order_validator_feedback: ', order_validator_feedback)
    order_instructions = create_order_instructions(previous_widget=widget, order_instructions=instructions, order_validator_feedback=order_validator_feedback)
    md("order_instructions created: ", order_instructions)
    instructions = order_instructions.order_placement_instructions
    md("**Order instructions**: ", instructions)
    
    ##########################################################
    md("## Create order object")
    
    md("order_placement_instructions: ", instructions)
    order = create_order(order_placement_instructions=instructions)
    md("**Order object created**: ", order)
    md('order.widget: ***', order.widget, '***')
    
    ##########################################################
    md("## Place order & validate")
    response = place_order(order=order)
    
    if response == "Valid":
        md("", "### Order placed successfully")
        break
    
    order_validator_feedback.append(response)
    md("**Cumulative validator feedback**: ")
    msgs = []
    for i in range(len(order_validator_feedback)):
        #msgs.append(f"* {i+1}. {order_validator_feedback[i]}")
        md(f"* {i+1}: {order_validator_feedback[i]}")
        
    #md("".join(msgs)) 

    loopCount += 1

**Original instructions**: Create a red widget that is round and 3kg in weight.

# loopCount: 0

## Create order instructions 

instructions: Create a red widget that is round and 3kg in weight.

order_validator_feedback: []

order_instructions created: Prediction(  
    reasoning='This is the first attempt to place the order, so there is no feedback or previous widget to consider. The instructions are clear: the widget must be red, have a round shape, and weigh 3kg. All requirements are straightforward and should be specified explicitly in the placement instructions.',  
    order_placement_instructions='Order a widget with the following specifications:\n- Color: Red\n- Shape: Round\n- Weight: 3kg'  
)

**Order instructions**: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 3kg

## Create order object

order_placement_instructions: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 3kg

**Order object created**: Prediction(  
    reasoning='The order placement instructions specify that the widget must be red in color, round in shape, and weigh 3kg. I will assign the `colour` property as "Red", the `shape` property as "round", and the `weightInKg` property as 3. All the required fields are matched exactly to the instructions. The shape value "round" is one of the allowed options ("round", "square", "triangle"), and the weight is a positive integer.',  
    widget=Widget(shape='round', weightInKg=3, colour='Red')  
)

order.widget: ***shape='round' weightInKg=3 colour='Red'***

## Place order & validate

**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3   
        Try again with a different shape or weight.

**Cumulative validator feedback**: 

1. **Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3   
        Try again with a different shape or weight. 

# loopCount: 1

## Create order instructions 

instructions: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 3kg

order_validator_feedback: ['**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3 \n        Try again with a different shape or weight.']

order_instructions created: Prediction(  
    reasoning='The previous order was rejected because a round widget cannot have a weight less than or equal to 5kg. The original order specified a round shape and a weight of 3kg, which violates this rule. To successfully place the order, I must either increase the weight to above 5kg or choose a different shape while maintaining the other specifications.',  
    order_placement_instructions='Order a widget with the following specifications:\n- Color: Red\n- Shape: Round\n- Weight: 6kg'  
)

**Order instructions**: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 6kg

## Create order object

order_placement_instructions: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 6kg

**Order object created**: Prediction(  
    reasoning='The instructions specify the following: color should be Red, shape should be Round, and weight should be 6kg. I will make sure that each of these values is matched exactly in the widget. The color will be set to "Red", the shape will be "round" (matching the enum options: round, square, triangle), and the weightInKg will be 6 (a positive integer).',  
    widget=Widget(shape='round', weightInKg=6, colour='Red')  
)

order.widget: ***shape='round' weightInKg=6 colour='Red'***

## Place order & validate

**Order rejected**: Widget weight is 6kg, this must divisible by 4.

**Cumulative validator feedback**: 

1. **Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3   
        Try again with a different shape or weight. 1. **Order rejected**: Widget weight is 6kg, this must divisible by 4. 

# loopCount: 2

## Create order instructions 

instructions: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 6kg

order_validator_feedback: ['**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3 \n        Try again with a different shape or weight.', '**Order rejected**: Widget weight is 6kg, this must divisible by 4.']

order_instructions created: Prediction(  
    reasoning="The feedback indicates two issues with previous order attempts: (1) Round widgets must weigh more than 5kg (which 6kg satisfies), and (2) the weight for the widget must be divisible by 4. The current requested weight is 6kg, which isn't divisible by 4; the closest valid weights above 5kg and divisible by 4 are 8kg or 12kg. Therefore, to meet all requirements, the order should specify a weight of 8kg.",  
    order_placement_instructions='Order a widget with the following specifications:\n- Color: Red\n- Shape: Round\n- Weight: 8kg'  
)

**Order instructions**: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 8kg

## Create order object

order_placement_instructions: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 8kg

**Order object created**: Prediction(  
    reasoning='The order specifies a widget that is red in color, round in shape, and weighs 8kg. I have verified each instruction:\n- The color should be set to "Red".\n- The shape should be "round", which is among the allowed shapes ("round", "square", "triangle").\n- The weight should be 8, which is a positive integer.\nAll specifications are clearly stated and match the schema.',  
    widget=Widget(shape='round', weightInKg=8, colour='Red')  
)

order.widget: ***shape='round' weightInKg=8 colour='Red'***

## Place order & validate

**Order rejected**: The `colour` chosen is red. We cannot make 'red' widgets at the moment, choose a different colour.

**Cumulative validator feedback**: 

1. **Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3   
        Try again with a different shape or weight. 1. **Order rejected**: Widget weight is 6kg, this must divisible by 4. 1. **Order rejected**: The `colour` chosen is red. We cannot make 'red' widgets at the moment, choose a different colour. 

# loopCount: 3

## Create order instructions 

instructions: Order a widget with the following specifications:  
- Color: Red  
- Shape: Round  
- Weight: 8kg

order_validator_feedback: ['**Order rejected**: Round widgets cannot weight less than or equal to 5kg. You tried to place an order with a `weightInKg`=3 \n        Try again with a different shape or weight.', '**Order rejected**: Widget weight is 6kg, this must divisible by 4.', "**Order rejected**: The `colour` chosen is red. We cannot make 'red' widgets at the moment, choose a different colour."]

order_instructions created: Prediction(  
    reasoning='There are three main issues noted in the feedback:\n1. Red widgets cannot be ordered at this time, so a different color must be chosen.\n2. Round widgets must weigh more than 5kg, and the weight must be divisible by 4.\n3. Previous attempts used weights of 3kg and 6kg, but 6 is not divisible by 4, and 3kg is too light.\nTo meet all requirements, I need to select a weight greater than 5kg that is also divisible by 4 (the next possible value is 8kg, which matches the original instructions), and choose a color other than red.',  
    order_placement_instructions='Order a widget with the following specifications:\n- Color: Blue (or any available color except red)\n- Shape: Round\n- Weight: 8kg'  
)

**Order instructions**: Order a widget with the following specifications:  
- Color: Blue (or any available color except red)  
- Shape: Round  
- Weight: 8kg

## Create order object

order_placement_instructions: Order a widget with the following specifications:  
- Color: Blue (or any available color except red)  
- Shape: Round  
- Weight: 8kg

**Order object created**: Prediction(  
    reasoning='The instructions specify a blue widget, or any color except red if blue is unavailable. Since blue is available, I will choose blue. The shape must be round, and the weight must be 8kg, which is a positive integer. All parameters are met exactly as requested.',  
    widget=Widget(shape='round', weightInKg=8, colour='blue')  
)

order.widget: ***shape='round' weightInKg=8 colour='blue'***

## Place order & validate



### Order placed successfully