In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
from sklearn.linear_model import SGDRegressor, LinearRegression

In [None]:
model =  SGDRegressor()

In [None]:
# model.fit(X_train, y_train)

In [None]:
# prediction = model.predict(X_test)

In [None]:
# mean_squared_error(y_test, p)

# Using Pydantic Models for Structured LLM Output

In the Session 6, we implemented retry mechanisms to handle validation errors, which mimics what some structured output frameworks are doing behind the scenes when they handle validation for you.

In this file, we'll experiment with passing Pydantic model directly in our API call using different frameworks and LLM providers.

We'll be able to:
- Use Pydantic models directly in our API calls to LLMs
- Reliably receive a properly structured response using a variety of different frameworks and LLM providers.

---

In [1]:
# Import packages
from pydantic import BaseModel, Field, EmailStr
from typing import List, Literal, Optional

import os
import openai
from openai import OpenAI
import instructor # wrapper for many llms providers in the api call to use our pydantic data model to get the structured response. You pass the pydantic data model to instructor, and it extracts the Json model schema, and the consturct a prompt and if there is any issue in the response, it will do series of retires in order to get a response, we are looking for
import anthropic # claude ai
from dotenv import load_dotenv
from datetime import date

In [2]:
# Load environment variables
load_dotenv(".env", override=True)

True

In [3]:
openai.api_key = os.getenv("OPENAI_API_KEY")

### Define your Pydantic models for user input and LLM output

In [4]:
# Define the UserInput model for customer support queries
class UserInput(BaseModel):
    name: str
    email: EmailStr
    query: str
    order_id: Optional[int] = Field(
        None,
        description="5-digit order number (cannot start with 0)",
        ge=10000,
        le=99999
    )
    purchase_date: Optional[date] = None

# Define the CustomerQuery model that inherits from UserInput
class CustomerQuery(UserInput):
    priority: str = Field(
        ..., description="Priority level: low, medium, high"
    )
    category: Literal[
        'refund_request', 'information_request', 'other'
    ] = Field(..., description="Query category")
    is_complaint: bool = Field(
        ..., description="Whether this is a complaint"
    )
    tags: List[str] = Field(..., description="Relevant keyword tags")

### Provide sample input and validate it using your model

In [21]:
# Define your input data as a JSON string
user_input_json = '''{
    "name": "Waqas",
    "email": "waqas@g.c",
    "query": "I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASAP.",
    "order_id": 12345,
    "purchase_date": "2025-12-31"
}'''

In [22]:
# Validate the user_input_json by creating a UserInput instance
user_input = UserInput.model_validate_json(user_input_json)
# print(user_input)
print(user_input.model_dump_json(indent=2))

{
  "name": "Waqas",
  "email": "waqas@g.c",
  "query": "I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASAP.",
  "order_id": 12345,
  "purchase_date": "2025-12-31"
}


### Build a prompt and call the Anthropic API with the instructor package for structured output

In [23]:
prompt =  f"""Analyze the following customer query 
    
    {user_input} 
    
and provide a structured response."""


print(prompt)

Analyze the following customer query 

    name='Waqas' email='waqas@g.c' query='I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASAP.' order_id=12345 purchase_date=datetime.date(2025, 12, 31) 

and provide a structured response.


In [24]:
# Use Anthropic with Instructor to get structured output
# client = instructor.from_anthropic(anthropic.Anthropic())

# response = client.messages.create(
#     model="claude-3-7-sonnet-latest",  
#     max_tokens=1024,
#     messages=[
#         {
#             "role": "user", 
#             "content": prompt
#         }
#     ],
#     response_model=CustomerQuery  
# )

# It will return the instance of CCustomerQuery data model. No need for extra validation

In [25]:
# # Inspect the returned structured data
# print(type(response))
# print(response.model_dump_json(indent=2))

### Use OpenAI's structured output API with your Pydantic schema

In [26]:
client = OpenAI()

In [27]:
# Initialize OpenAI client and call passing CustomerQuery in your API call

response = client.chat.completions.parse(model="gpt-4o",
    messages=[{"role": "user", "content": prompt}],
    response_format=CustomerQuery,
    max_completion_tokens= 1024
)
response_content = response.choices[0].message.content
# print(type(response_content))
print(response_content)

# You are not getting an instance of a  data model directly but you are getting back a Json 
# What openai is doing is a constraint generation indicated that you are looking for Json format. So it is not guaranteed to be a valid data model but guaranteed to be a valid Json String

{"name":"Waqas","email":"waqas@g.c","query":"I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASAP.","order_id":12345,"purchase_date":null,"priority":"high","category":"refund_request","is_complaint":true,"tags":["replacement","damaged item","urgent","repeat issue"]}


In [31]:
print(type(response_content))

<class 'str'>


### Additional advanced usage and inspection

In [28]:
# Validate the repsonse you got from the LLM. This one extra step using chat completions
valid_data = CustomerQuery.model_validate_json(response_content)
print(type(valid_data))
print(valid_data.model_dump_json(indent=2))

<class '__main__.CustomerQuery'>
{
  "name": "Waqas",
  "email": "waqas@g.c",
  "query": "I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASAP.",
  "order_id": 12345,
  "purchase_date": null,
  "priority": "high",
  "category": "refund_request",
  "is_complaint": true,
  "tags": [
    "replacement",
    "damaged item",
    "urgent",
    "repeat issue"
  ]
}


In [29]:
# Try the responses API from OpenAI
response = client.responses.parse(
    model="gpt-4o",
    input=[{"role": "user", "content": prompt}],
    text_format=CustomerQuery
)

print(type(response))

<class 'openai.types.responses.parsed_response.ParsedResponse[CustomerQuery]'>


In [33]:
response

ParsedResponse[CustomerQuery](id='resp_019f73484270e0ff0068f34539d3e481a2a271d8bef352004e', created_at=1760773434.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4o-2024-08-06', object='response', output=[ParsedResponseOutputMessage[CustomerQuery](id='msg_019f73484270e0ff0068f3453ae21481a2af266701b2111c04', content=[ParsedResponseOutputText[CustomerQuery](annotations=[], text='{"name":"Waqas","email":"waqas@g.c","query":"I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASAP.","order_id":12345,"purchase_date":"2025-12-31","priority":"high","category":"refund_request","is_complaint":true,"tags":["replacement","damaged_item","urgent"]}', type='output_text', logprobs=[], parsed=CustomerQuery(name='Waqas', email='waqas@g.c', query='I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASA

In [32]:
# Investigate class inheritance structure of the OpenAI response
def print_class_inheritence(llm_response):
    for cls in type(llm_response).mro(): # method resolution in Python, allow you to printout inheritance structure of that response
        print(f"{cls.__module__}.{cls.__name__}")

print_class_inheritence(response) # response coming out from openai is itself a pydantic model

# What we got in this case is an instance of our own pytdantic model nested inside another pydantic model from openai

# That being said, most llm providers are doing data validation at their own end

openai.types.responses.parsed_response.ParsedResponse[CustomerQuery]
openai.types.responses.parsed_response.ParsedResponse
openai.types.responses.response.Response
openai._models.GenericModel
openai._compat.GenericModel
openai.BaseModel
pydantic.main.BaseModel
typing.Generic
builtins.object


In [35]:
# Print the response type and content 
print(type(response.output_parsed))
print(response.output_parsed.model_dump_json(indent=2))

<class '__main__.CustomerQuery'>
{
  "name": "Waqas",
  "email": "waqas@g.c",
  "query": "I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASAP.",
  "order_id": 12345,
  "purchase_date": "2025-12-31",
  "priority": "high",
  "category": "refund_request",
  "is_complaint": true,
  "tags": [
    "replacement",
    "damaged_item",
    "urgent"
  ]
}


In [None]:
# pip install pydantic-ai

In [None]:
# from platform import python_version
# print(python_version())

In [37]:
# Pydantic AI package for defining an agent and getting a structured response

from pydantic_ai import Agent # Agent framework by pydantic
import nest_asyncio # it is required to run pydantic AI in  jupyter notebook
nest_asyncio.apply()

agent = Agent(
    model="google-gla:gemini-2.0-flash", # openai:gpt-40,
    output_type=CustomerQuery
)

response = agent.run_sync(prompt)

In [38]:
# Print out the repsonse type and content
print(type(response.output))
print(response.output.model_dump_json(indent=2))

<class '__main__.CustomerQuery'>
{
  "name": "Waqas",
  "email": "waqas@g.c",
  "query": "I ordered a new computer monitor and it arrived with the screen cracked. This is the second time this has happened. I need a replacement ASAP.",
  "order_id": 12345,
  "purchase_date": "2025-12-31",
  "priority": "high",
  "category": "refund_request",
  "is_complaint": true,
  "tags": [
    "cracked screen",
    "replacement",
    "monitor"
  ]
}
