Prompting for structure and setting up a retry method. 
Learning goals:
1. Define structured data models for LLM responses
2. Build robust retry mechanisms for validation errors
3. Create resuable functions for LLM interactions

In [1]:
# Import necessary libraries
from pydantic import BaseModel, ValidationError, Field, EmailStr
from typing import List, Literal, Optional
import json
from datetime import date
from dotenv import load_dotenv
#import openai # very limited use from openai
import google.generativeai as genai

In [2]:
# load environment variables for API access
#load_dotenv(dotenv_path="C:\\Users\\yawen\\Documents\\Learning\\Pydantic\\openai.env")
# Initialize OpenAI client for API calls
#client = openai.OpenAI()
# Load environment variables for API access
load_dotenv(dotenv_path="C:\\Users\\yawen\\Documents\\Learning\\Pydantic\\geminiai.env")

import os
api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)
#print(f"API key loaded: {api_key}")

In [3]:
# Defome some sample input data
# Define a JSON string representing user data

user_input_json = """
{
    "name": "Joe User",
    "email": "joe.user@example.com",
    "query": "I forgot my password.",
    "order_number": null,
    "purchase_date": null
}
"""

In [4]:
# Define Userinput Data Model
class UserInput(BaseModel):
    name: str
    email: EmailStr
    query: str
    order_id: Optional[int] = Field(
        None,
        description="5-digit order number (cannot start with 0)",
        ge=10000,
        le=99999
    )
    purchase_date: Optional[date] = None

In [5]:
# Create UserInput instance from JSON data
user_input = UserInput.model_validate_json(user_input_json)

Create a mew data model called CustomerQuery

In [6]:
## Define the customerquery model that inherits from UserInput
class CustomerQuery(UserInput):
    priority: str = Field(
        ..., description="Priority level: low, medium, high"
    )
    category: Literal[
        'refund_request', 'infomration_request', 'other'
    ] = Field(..., description="Query category")
    is_complaint: bool = Field(
        ..., description="whether this is a complaint"
    )
    tags: List[str] = Field(..., description="Relevant keyword tags")
    
    

In [7]:
# Create a sample model instance with generic example data to guide LLM.
# This ensures the example differs from the actual user query.

example_response_structure = f""" {{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen broken",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"]
}}"""

In [8]:
# Create a prompt with user data and expected JSON structure
prompt = f""" 
please analyze this user query \n {user_input.model_dump_json(indent=2)}:

Return your analysis as a JSON object matching this exact structure and data types:
{example_response_structure}

Reponsd ONLY with valid JSON. Do not include any explanations or other text or formatting 
before or after the JSON object
"""

print(prompt)

 
please analyze this user query 
 {
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure and data types:
 {
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen broken",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"]
}

Reponsd ONLY with valid JSON. Do not include any explanations or other text or formatting 
before or after the JSON object



In [9]:
# Define a functinon to call the LLM
def call_llm_google(prompt, model_name="gemini-2.5-pro"):
    """
    Calls a Google Generative AI model with the given prompt

    Args:
    prompt (str): The user's input prompt.
    model_name (str): The name of the google Generative AI model to use.
    Defaults to "gemini-2.5-pro", other options include "gemini-pro-vision", "gemini-1.5-flash", etc.

    Returns:
        str: The generated content from the model
    """
    try:
        # Create a genai model
        model = genai.GenerativeModel(model_name)
        # Send the prompt to the model
        response = model.generate_content(prompt)

        # Extract the text content from the response
        return response.text

    except Exception as e:
        print(f"An Error occured: {e}")
        return f"Error: Could not generate response. {e}"

In [10]:
response_content = call_llm_google(prompt)
print(response_content)

```json
{
    "name": "Joe User",
    "email": "joe.user@example.com",
    "query": "I forgot my password.",
    "order_id": null,
    "purchase_date": null,
    "priority": "low",
    "category": "account_issue",
    "is_complaint": false,
    "tags": ["password", "account", "login", "support"]
}
```


Validate the LLM output using your CustomerQuery model

Note: the following cell will produce a validation error. This is expected. You can simply proceed with the rest of the notebook as cells below do not depend on this cell.

In [11]:
# Attempt to parse the response into CustomerQuery model
valid_data = CustomerQuery.model_validate_json(response_content)

ValidationError: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...in", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

# Define a function for error handling

In [17]:
# Define a function to validate an LLM response
def validate_with_model(data_model, llm_response):
    try:
        validated_data = data_model.model_validate_json(llm_response)
        print("Data Validation Successful")
        print(validated_data.model_dump_json(indent=2))
        return validated_data, None

    except ValidationError as e:
        print(f"Error validating data: {e}")
        error_msg = (
            f"This response generated a validation error: {e}"
        )
        return None, error_msg
        

In [18]:
# Test validation function with the LLM response
validated_data, validation_error = validate_with_model(
    CustomerQuery, response_content
)

Error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...in", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid


In [19]:
# Define a function to create a retry prompt with error feedback 

def create_retry_prompt(
    original_prompt, original_response, error_message
):
    retry_prompt = f"""
    This is a request to fix an error in the structure of an llm_response
    Here is the original request:
    <original_prompt>
    {original_prompt}
    </original_prompt>

    Here is the original llm_response
    <llm_response>
    {original_response}
    </llm_response>

    This response generated an error:
    <error_message>
    {error_message}
    </error_message>

    Compare the error message and the llm_response and identify what needs to be fixed or removed
    in the llm_response to resolve this error.

    Response ONLY with valid JSON. Do not include any explanations or other text or formatting before or
    after the JSON string
    
    """
    return retry_prompt

In [20]:
# Create a retry prompt for validation errors

validation_retry_prompt = create_retry_prompt(
    original_prompt=prompt,
    original_response=response_content,
    error_message=validation_error
)

print(validation_retry_prompt)


    This is a request to fix an error in the structure of an llm_response
    Here is the original request:
    <original_prompt>
     
please analyze this user query 
 {
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure and data types:
 {
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen broken",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"]
}

Reponsd ONLY with valid JSON. Do not include any explanations or other text or formatting 
before or after the JSON object

    </original_prompt>

    Here is the original llm_response
    <llm_response>
    ```json
{
    "name": "Joe User",
    "email": "joe.user@example.com",
    "

In [21]:
# Call the LLM with the validation retry prompt
validation_retry_response = call_llm_google(validation_retry_prompt)
print(validation_retry_response)

```json
{
    "name": "Joe User",
    "email": "joe.user@example.com",
    "query": "I forgot my password.",
    "order_id": null,
    "purchase_date": null,
    "priority": "low",
    "category": "account_issue",
    "is_complaint": false,
    "tags": ["password", "account", "login", "support"]
}
```


In [22]:
# Attempt to validate retry response from LLM
validated_data, validation_error = validate_with_model(
    CustomerQuery, validation_retry_response
)

Error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...in", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid


In [23]:
# Create a second retry prompt for validation errors
second_validation_retry_prompt = create_retry_prompt(
    original_prompt=validation_retry_prompt,
    original_response=validation_retry_response,
    error_message=validation_error
)

print(second_validation_retry_prompt)


    This is a request to fix an error in the structure of an llm_response
    Here is the original request:
    <original_prompt>
    
    This is a request to fix an error in the structure of an llm_response
    Here is the original request:
    <original_prompt>
     
please analyze this user query 
 {
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure and data types:
 {
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen broken",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"]
}

Reponsd ONLY with valid JSON. Do not include any explanations or other text or formatting 
before or after the JSON object

    </original_prompt>

  

In [25]:
# Call the LLM with the second validation retry prompt
second_validation_retry_response = call_llm_google(
    second_validation_retry_prompt
)
print(second_validation_retry_response)

```json
{
    "name": "Joe User",
    "email": "joe.user@example.com",
    "query": "I forgot my password.",
    "order_id": null,
    "purchase_date": null,
    "priority": "low",
    "category": "account_issue",
    "is_complaint": false,
    "tags": ["password", "account", "login", "support"]
}
```


In [34]:
# Define a funtion to handle multiple retries in a feedbacck loop

def validate_llm_response(
    prompt, data_model, n_retry=5, model="gemini-2.5-pro"
):
    # Initial LLM Call
    response_content = call_llm_google(prompt)
    current_prompt = prompt

    # Try to validate with the model

    for attempt in range(n_retry + 1):
        
        validated_data, validation_error = validate_with_model(
            data_model, response_content
        )

        if validation_error:

            if attempt < n_retry:
                print(f"retry {attempt} of {n_retry} failed, trying again... ")
            else:
                
                print(f"Max retries reached. Last error: {validation_error}")

                return None, (
                     f"Max retries reached. Last error: {validation_error}"
                )

            validation_retry_prompt = create_retry_prompt(
                original_prompt=current_prompt,
                original_response=response_content,
                error_message=validation_error
                )            
            response_content = call_llm_google(
                validation_retry_prompt
            )
            current_prompt = validation_retry_prompt
            continue

        # if you get here, both parsing and validation succeeded
        return validated_data, None

            


In [35]:
# Test your commplete solution with the original prompt
validated_data, error = validate_llm_response(
    prompt, CustomerQuery
)

Error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...in", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 0 of 5 failed, trying again... 
Error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...in", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 1 of 5 failed, trying again... 
Error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...in", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 2 of 5 failed, trying again... 
Erro

In [36]:
# Investigate the model_json_schema for CustomerQuery
data_model_schema = json.dumps(
    CustomerQuery.model_json_schema(), indent=2
)
print(data_model_schema)

{
  "properties": {
    "name": {
      "title": "Name",
      "type": "string"
    },
    "email": {
      "format": "email",
      "title": "Email",
      "type": "string"
    },
    "query": {
      "title": "Query",
      "type": "string"
    },
    "order_id": {
      "anyOf": [
        {
          "maximum": 99999,
          "minimum": 10000,
          "type": "integer"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "5-digit order number (cannot start with 0)",
      "title": "Order Id"
    },
    "purchase_date": {
      "anyOf": [
        {
          "format": "date",
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "title": "Purchase Date"
    },
    "priority": {
      "description": "Priority level: low, medium, high",
      "title": "Priority",
      "type": "string"
    },
    "category": {
      "description": "Query category",
      "enum"

In [37]:
# Print the original prompt from above
print(prompt)

 
please analyze this user query 
 {
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure and data types:
 {
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen broken",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"]
}

Reponsd ONLY with valid JSON. Do not include any explanations or other text or formatting 
before or after the JSON object



In [38]:
# Create new prompt with user input and model_json_schema
prompt = f"""
Please analyze this user query\n {user_input.model_dump_json(indent=2)}:

Return your analysis as a JSON object matching the following schema:
{data_model_schema}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.
"""

In [39]:
# Run your validate_llm_response function with the new prompt
final_analysis, error = validate_llm_response(
    prompt, CustomerQuery
)

Error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n  "name": "J...   "login"\n  ]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 0 of 5 failed, trying again... 
Error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n  "name": "J...   "login"\n  ]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 1 of 5 failed, trying again... 
Error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n  "name": "J...   "login"\n  ]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 2 of 5 failed, trying again... 
Erro

In [14]:
# Define a functiona lto call the LLM
# def call_llm(prompt, model="gpt-4o"):
   # response = client.chat.completions.create(
       # model=model,
        #messages=[{"role": "user", "content": prompt}]
    #)
   # return response.choices[0].message.content