In [92]:
from pydantic import BaseModel, ValidationError, Field, EmailStr
from typing import List, Literal, Optional
from datetime import date
from dotenv import load_dotenv

import openai
import json
import os
import sys

try: 
    dotenv_loaded = load_dotenv()
    if not dotenv_loaded:
        print("Warning: no .env file found or loaded.")
except Exception as e:
    print(f"Error loading .env file: {e}")
    sys.exit(1)
    
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    print("Error: OPENAI_API_KEY not found in environment variables")
    print("Please create a .env file with: OPENAI_API_KEY=your_api_key_here")
    sys.exit(1)
    
try: 
    client = openai.OpenAI(api_key=api_key)
    print("OpenAI client successfully initialized.")
except Exception as e:
    print(f"Error initializing OpenAI client: {e}")
    sys.exit(1)

OpenAI client successfully initialized.


In [93]:
class UserInput(BaseModel):
    name: str
    email: str
    query: str
    order_id: Optional[int] = Field(
        None, 
        description="5 digit (not zero) Order ID if available",
        lower_bound = 10000,
        upper_bound = 99999,
    )
    purchase_date: Optional[date] = None


user_input_json = """
{
    "name": "John Doe",
    "email": "john.doe@example.com",
    "query": "I need assistance retrieving my order invoice",
    "order_id": null,
    "purchase_date": null
}
"""

user_input = UserInput.model_validate_json(user_input_json)

In [94]:
class UserQuery(UserInput):
    priority: str = Field(
        ..., description="Priority level: low, medium, high",
    )
    category: Literal[
        'refund_request',
        'information_request',
        'other'
    ]
    is_compliant: bool = Field(
        ..., description="Whether the query is compliant or not",
    )
    tags: List[str] = Field(
        description="List of tags associated with the query",
    )
    


user_input_json = """
    {
        "name": "John Doe",
        "email": "john.doe@example.com",
        "query": "I need assistance retrieving my order invoice",
        "order_id": null,
        "purchase_date": null,
        "priority": "medium",
        "category": "refund_request",
        "is_compliant": true,
        "tags": ["monitor", "support", "exchange"] 
    }
"""

user_input = UserInput.model_validate_json(user_input_json)

In [95]:
model_schema = json.dumps(UserQuery.model_json_schema(), indent = 2)
print(f"model schema:\n{model_schema}")

model schema:
{
  "properties": {
    "name": {
      "title": "Name",
      "type": "string"
    },
    "email": {
      "title": "Email",
      "type": "string"
    },
    "query": {
      "title": "Query",
      "type": "string"
    },
    "order_id": {
      "anyOf": [
        {
          "type": "integer"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "5 digit (not zero) Order ID if available",
      "lower_bound": 10000,
      "title": "Order Id",
      "upper_bound": 99999
    },
    "purchase_date": {
      "anyOf": [
        {
          "format": "date",
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "title": "Purchase Date"
    },
    "priority": {
      "description": "Priority level: low, medium, high",
      "title": "Priority",
      "type": "string"
    },
    "category": {
      "enum": [
        "refund_request",
        "information_r

In [96]:
def call_llm(prompt: str, model: str = "gpt-4o") -> str:
    response = client.chat.completions.create(
        model = model,
        messages = [{"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content

In [97]:
def validate_with_model(data_model, llm_response):
    try:
        validated_data = data_model.model_validate_json(llm_response)
        print("Data validation successful.")
        return validated_data, None
    except ValidationError as e:
        print(f"Validation failed: {e}")
        error_details = []
        for error in e.errors():
            field = error['loc'][0] if error['loc'] else 'unknown'
            msg = error['msg']
            error_details.append(f"Field '{field}': {msg}")
        
        error_message = f"JSON validation errors:\n" + "\n".join(error_details) + "\n\nPlease fix these issues and return valid JSON only."
        return None, error_message
    except Exception as e:
        error_message = f"JSON parsing error: {e}\nPlease return valid JSON only."
        return None, error_message

In [98]:
user_input_with_query_schema = f"""
Please analyze the following user query:
{user_input.model_dump_json(indent=2)}

Return your analysis as a JSON object matching the following schema:
{UserQuery.model_json_schema()}

NOTE: Return ONLY the raw JSON object. Do NOT wrap it in markdown code blocks or any other formatting. Do NOT include any explanations, comments, or additional text before or after the JSON.

Example of correct format:
{{"name": "value", "field": "value"}}

NOT this:
```json
{{"name": "value"}}
```
"""

In [99]:
def validate_llm_response(prompt, data_model, n_retries=5, model="gpt-4o"):
    current_prompt = prompt
    response_content = call_llm(prompt, model)
    
    for attempt in range(n_retries + 1):
        validate_data, validated_error = validate_with_model(data_model, response_content)
        if validated_error:
            if attempt < n_retries:
                print(f"Attempt {attempt + 1} failed. Retrying...")
                current_prompt = current_prompt + "\n" + validated_error
                response_content = call_llm(current_prompt, model)
            else:
                print(f"All {n_retries + 1} attempts failed. Returning None.")
                return None, validated_error
        else:
            print(f"Validation successful on attempt {attempt + 1}")
            return validate_data, None
    
    # If we get here, all attempts failed
    return None, "Maximum retries exceeded"

In [100]:
final_analysis, error = validate_llm_response(
    user_input_with_query_schema, UserQuery, 3
)

Data validation successful.
Validation successful on attempt 1
