# 3: Prompting for structure and setting up a retry method 

- Define structured data models for LLM responses
- Build robust retry mechanisms for validation errors
- Create reusable functions for LLM interactions

---

##### Import packages and initialize the OpenAI client

In [3]:
from pydantic import BaseModel, ValidationError, Field, EmailStr
from typing import List, Literal, Optional

import os
import json
from datetime import date
import openai


from dotenv import load_dotenv
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [4]:
client = openai.OpenAI()

##### Define sample input data (JSON)

In [5]:
# Define a JSON string representing user input
user_input_json = '''
{
    "name": "Joe User",
    "email": "joe.user@example.com",
    "query": "I forgot my password.",
    "order_number": null,
    "purchase_date": null
}
'''

##### Define UserInput Data Model

In [6]:
class UserInput(BaseModel):
    name: str
    email: EmailStr
    query: str
    order_id: Optional[int] = Field(
        None,
        description="5-digit order number (cannot start with 0)",
        ge=10000,
        le=99999
    )
    purchase_date: Optional[date] = None    


##### Create an instance of user input

In [7]:
try:
    user_input = UserInput.model_validate_json(user_input_json)
except ValidationError as e:
    print(f"Validation errors occured:")
    for error in e.errors():
        print(f"{error['loc'][0]}: {error['msg']}")     


##### Create a new Data Model called CustomerQuery

In [8]:
class CustomerQuery(UserInput):
    priority: str = Field(
        default = ...,
        description = "Priority Level: low, medium, high"
    )
    category: Literal["refund_request", "information_request", "other"] = Field(
        default = ...,
        description = "Query Category"
    )
    is_complaint: bool = Field(
        default = ...,
        description = "Whether this is a complaint"
    )
    tags: List[str] = Field(
        default = ...,
        description = "Relevant keyword tags"
    )

#### Construct an example output

In [9]:
# Create a prompt with generic example data to guide LLM.
example_response_structure = f"""{{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen cracked. I need to exchange it for a new one.",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"] 
}}"""

#### Create prompt with user data and expected JSON structure

In [10]:
# Create prompt with user data and expected JSON structure
prompt = f"""
Please analyze this user query\n {user_input.model_dump_json(indent=2)}:

Return your analysis as a JSON object matching this exact structure 
and data types:
{example_response_structure}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.
"""

print(prompt)


Please analyze this user query
 {
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure 
and data types:
{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen cracked. I need to exchange it for a new one.",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"] 
}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.



#### Define a function to call an LLM and try it with your prompt

In [11]:
# Define a function to call the LLM
def call_llm(prompt, model = "gpt-4o"):
    response = client.chat.completions.create(
        model = model,
        messages = [
            {
                "role": "user",
                "content": prompt
            }
        ]
    )
    return response.choices[0].message.content

In [12]:
# Get response from LLM
response_content = call_llm(prompt)
print(response_content)

```json
{
    "name": "Joe User",
    "email": "joe.user@example.com",
    "query": "I forgot my password.",
    "order_id": null,
    "purchase_date": null,
    "priority": "high",
    "category": "account_issue",
    "is_complaint": false,
    "tags": ["password", "support", "account"]
}
```


#### Validate the LLM output using your CustomerQuery model

##### Attempt to parse the response into CustomerQuery model

In [13]:
valid_data = CustomerQuery.model_validate_json(response_content)

ValidationError: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...rt", "account"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

#### Define a function for error handling

In [14]:
def validate_with_model(data_model, llm_response):
    try:
        validated_data = data_model.model_validate_json(llm_response)
        print("data validation successful")
        print(validated_data.model_dump_json(indent = 2))
        return validated_data, None
    except ValidationError as e:
        print(f"Error validating data:{e}")
        error_message = (
            f"This response generated a validation error: {e}"
        )
        return None, error_message
        

#### Test the validation function with LLM Model

In [15]:
# Test your validation function with the LLM response
validated_data, validation_error = validate_with_model(
    CustomerQuery, response_content
)

Error validating data:1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...rt", "account"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid


#### Define a function to create a retry prompt including error details

In [16]:
# Define a function to create a retry prompt with error feedback
def create_retry_prompt(
    original_prompt, original_response, error_message
):
    retry_prompt = f"""
This is a request to fix an error in the structure of an llm_response.
Here is the original request:
<original_prompt>
{original_prompt}
</original_prompt>

Here is the original llm_response:
<llm_response>
{original_response}
</llm_response>

This response generated an error: 
<error_message>
{error_message}
</error_message>

Compare the error message and the llm_response and identify what 
needs to be fixed or removed
in the llm_response to resolve this error. 

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON string.
"""
    return retry_prompt

#### Create a retry prompt for validation errors

In [17]:
# Create a retry prompt for validation errors
validation_retry_prompt = create_retry_prompt(
    original_prompt=prompt,
    original_response=response_content,
    error_message=validation_error
)

print(validation_retry_prompt)


This is a request to fix an error in the structure of an llm_response.
Here is the original request:
<original_prompt>

Please analyze this user query
 {
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure 
and data types:
{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen cracked. I need to exchange it for a new one.",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"] 
}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.

</original_prompt>

Here is the original llm_response:
<llm_response>
```json
{
    "name": "Joe User",
    "email": "joe.user@example.co

#### Call the LLM with RETRY PROMPT

In [18]:
# Call the LLM with the validation retry prompt
validation_retry_response = call_llm(validation_retry_prompt)
print(validation_retry_response)

```json
{
    "name": "Joe User",
    "email": "joe.user@example.com",
    "query": "I forgot my password.",
    "order_id": null,
    "purchase_date": null,
    "priority": "high",
    "category": "account_issue",
    "is_complaint": false,
    "tags": ["password", "support", "account"]
}
```


In [19]:
# Attempt to validate retry response from LLM
validated_data, validation_error = validate_with_model(
    CustomerQuery, validation_retry_response
)

Error validating data:1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...rt", "account"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid


#### Create a second retry prompt

In [20]:
# Create a second retry prompt for validation errors
second_validation_retry_prompt = create_retry_prompt(
    original_prompt=validation_retry_prompt,
    original_response=validation_retry_response,
    error_message=validation_error
)

print(second_validation_retry_prompt)


This is a request to fix an error in the structure of an llm_response.
Here is the original request:
<original_prompt>

This is a request to fix an error in the structure of an llm_response.
Here is the original request:
<original_prompt>

Please analyze this user query
 {
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure 
and data types:
{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen cracked. I need to exchange it for a new one.",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"] 
}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.

</original_prompt

#### Call the LLM with the second validation retry prompt

In [21]:
# Call the LLM with the second validation retry prompt
second_validation_retry_response = call_llm(
    second_validation_retry_prompt
)
print(second_validation_retry_response)

{"name": "Joe User", "email": "joe.user@example.com", "query": "I forgot my password.", "order_id": null, "purchase_date": null, "priority": "high", "category": "account_issue", "is_complaint": false, "tags": ["password", "support", "account"]}


#### Define a function to handle multiple retries in a feedback loop

In [22]:
def validate_llm_response(
        prompt,
        data_model,
        n_retry = 5,
        model = "gpt-4o"
):
    # Initial LLM call
    response_content = call_llm(prompt, model=model)
    current_prompt = prompt

    # Try to validate with the model
    # attempt : 0 = initial, 1  = first retry ...
    for attempt in range(n_retry + 1):
        validated_data, validation_error = validate_with_model(data_model, response_content)
        
        if validation_error:
            if attempt < n_retry:
                print(f"Retry {attempt} of {n_retry} failed, trying again")
            else:
                print(f"Max retries reached. Last Error: {validation_error}")
                return None
            
            validation_retry_prompt = create_retry_prompt(
                original_prompt=current_prompt,
                original_response= response_content,
                error_message= validation_error
            )
            response_content = call_llm(
                validation_retry_prompt, model = model
            )
            current_prompt = validation_retry_prompt
            continue # With continue the code goes into the loop again and does not execute the below return
        return validated_data, None

#### Test the complete solution

In [23]:
validated_data, error = validate_llm_response(
    prompt, CustomerQuery
)

Error validating data:1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...et", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
Retry 0 of 5 failed, trying again
Error validating data:1 validation error for CustomerQuery
category
  Input should be 'refund_request', 'information_request' or 'other' [type=literal_error, input_value='account_issue', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/literal_error
Retry 1 of 5 failed, trying again
data validation successful
{
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null,
  "priority": "high",
  "category": "other",
  "is_complaint": false,
  "tags": [
    "password_reset",
    "support"
  ]
}


#### Investigate the model_json_schema for CustomerQuery

In [24]:
data_model_schema = json.dumps(
    CustomerQuery.model_json_schema(), indent=2
)
print(data_model_schema)

{
  "properties": {
    "name": {
      "title": "Name",
      "type": "string"
    },
    "email": {
      "format": "email",
      "title": "Email",
      "type": "string"
    },
    "query": {
      "title": "Query",
      "type": "string"
    },
    "order_id": {
      "anyOf": [
        {
          "maximum": 99999,
          "minimum": 10000,
          "type": "integer"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "5-digit order number (cannot start with 0)",
      "title": "Order Id"
    },
    "purchase_date": {
      "anyOf": [
        {
          "format": "date",
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "title": "Purchase Date"
    },
    "priority": {
      "description": "Priority Level: low, medium, high",
      "title": "Priority",
      "type": "string"
    },
    "category": {
      "description": "Query Category",
      "enum"

#### Construct a new prompt using the JSON schema of your data model -  better way to provide schema instead of an example

In [25]:
# Create new prompt with user input and model_json_schema
prompt = f"""
Please analyze this user query\n {user_input.model_dump_json(indent=2)}:

Return your analysis as a JSON object matching the following schema:
{data_model_schema}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.
"""

In [26]:
# Run your validate_llm_response function with the new prompt
final_analysis, error = validate_llm_response(
    prompt, CustomerQuery
)

Error validating data:1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n  "name": "J..._password"\n  ]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
Retry 0 of 5 failed, trying again
data validation successful
{
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null,
  "priority": "high",
  "category": "other",
  "is_complaint": false,
  "tags": [
    "password",
    "account_access",
    "forgot_password"
  ]
}
