### Getting structured output from text

In [1]:
from openai import OpenAI
from pydantic import BaseModel

client = OpenAI()

class CalenderEvent(BaseModel):
    name: str
    date: str
    participants: list[str]
    

In [2]:
response = client.responses.parse(
    model="gpt-4o-mini",
    input=[
        {"role": "system",  "content": "Extract the event information"},
        {"role": "user", "content": "James and Alvin are going to a react event on Friday"}
    ],
    text_format=CalenderEvent
)

In [33]:
print(response.output_parsed.model_dump_json(indent=4))

{
    "title": "A Deep Learning Approach for Predicting Crop Yields",
    "authors": [
        "John Doe",
        "Jane Smith",
        "Raj Patel"
    ],
    "abstract": "Predicting crop yield is a critical task in agriculture that affects food supply chains and economic planning. This study proposes a deep learning-based model utilizing satellite imagery and historical weather data to predict yields of major crops. Our model outperforms traditional statistical methods in accuracy and scalability.",
    "keywords": [
        "Deep Learning",
        "Crop Yield",
        "Agriculture",
        "Satellite Imagery",
        "Weather Data"
    ]
}


## Chain of Thoughts
You can ask the model to output an answer in a structured step-by-step way to guide the user through the solution

In [7]:
class Step(BaseModel):
    explanation: str
    output: str
    
class MathReasoning(BaseModel):
    steps: list[Step]
    final_answer: str
    
response = client.responses.parse(
    model="gpt-4o-mini",
    input=[
        {"role": "system", "content":"You are a helpful math tutor. Guide the user through the solution step by step"},
        {"role":"user", "content": "How to solve 8x + 7 = -23"}
    ],
    text_format=MathReasoning
)

math_reasoning = response.output_parsed

In [38]:
print(math_reasoning.model_dump_json(indent=2))

{
  "steps": [
    {
      "explanation": "Start with the equation: 8x + 7 = -23",
      "output": "8x + 7 = -23"
    },
    {
      "explanation": "Subtract 7 from both sides to isolate the term with x.",
      "output": "8x = -23 - 7"
    },
    {
      "explanation": "Calculate the right side: -23 - 7 = -30.",
      "output": "8x = -30"
    },
    {
      "explanation": "Now, divide both sides by 8 to solve for x.",
      "output": "x = -30 / 8"
    },
    {
      "explanation": "Simplify -30 / 8. Dividing both numerator and denominator by 2 gives -15 / 4.",
      "output": "x = -15 / 4"
    }
  ],
  "final_answer": "x = -3.75 or x = -15/4"
}


## Structured Data Extraction
You can define a structured field to help extract structured data from unstructured output, such as research paper

In [20]:
unstructured_paper = """
A Deep Learning Approach for Predicting Crop Yields

John Doe, Jane Smith, Raj Patel

Abstract: Predicting crop yield is a critical task in agriculture that affects food supply chains and economic planning. This study proposes a deep learning-based model utilizing satellite imagery and historical weather data to predict yields of major crops. Our model outperforms traditional statistical methods in accuracy and scalability.

Keywords: Deep Learning, Crop Yield, Agriculture, Satellite Imagery, Weather Data
"""

class ResearchPaperExtraction(BaseModel):
    title: str
    authors: list[str]
    abstract: str
    keywords: list[str]
    
response = client.responses.parse(
    model="gpt-4o-mini",
    input=[
        {"role": "system", "content":"You are an expert data extraction expert. You will be given unstructured text from a research paper and should convert it into the given structured"},
        {"role":"user", "content": unstructured_paper}
    ],
    text_format=ResearchPaperExtraction
)

research_paper_extraction = response.output_parsed

In [32]:
print(research_paper_extraction.model_dump_json(indent=4))

{
    "title": "A Deep Learning Approach for Predicting Crop Yields",
    "authors": [
        "John Doe",
        "Jane Smith",
        "Raj Patel"
    ],
    "abstract": "Predicting crop yield is a critical task in agriculture that affects food supply chains and economic planning. This study proposes a deep learning-based model utilizing satellite imagery and historical weather data to predict yields of major crops. Our model outperforms traditional statistical methods in accuracy and scalability.",
    "keywords": [
        "Deep Learning",
        "Crop Yield",
        "Agriculture",
        "Satellite Imagery",
        "Weather Data"
    ]
}


## UI Generation
You can generate valid HTML by representing it as a recursive data structure with constraints, like enums

In [39]:
from enum import Enum
from typing import List

class UIType(str, Enum):
    div = "div"
    button = "button"
    header = "header"
    section = "section"
    field = "field"
    form = "form"
    
class Attribute(BaseModel):
    name: str
    value: str
    
class UI(BaseModel):
    type: UIType
    label: str
    children: List["UI"]
    attributes: List[Attribute]
    
UI.model_rebuild # This is required to enable recursive types

class Response(BaseModel):
    ui: UI
    
response = client.responses.parse(
    model="gpt-4o-mini",
    input=[
        {"role": "system", "content": "You are a UI generator AI. Convert the user input into a UI"},
        {"role":"user", "content":"Make a user profile form"},
    ],
    text_format=Response
)

ui_response = response.output_parsed

In [40]:
print(ui_response.model_dump_json(indent=2))

{
  "ui": {
    "type": "form",
    "label": "User Profile Form",
    "children": [
      {
        "type": "field",
        "label": "First Name",
        "children": [],
        "attributes": [
          {
            "name": "placeholder",
            "value": "Enter your first name"
          },
          {
            "name": "required",
            "value": "true"
          }
        ]
      },
      {
        "type": "field",
        "label": "Last Name",
        "children": [],
        "attributes": [
          {
            "name": "placeholder",
            "value": "Enter your last name"
          },
          {
            "name": "required",
            "value": "true"
          }
        ]
      },
      {
        "type": "field",
        "label": "Email",
        "children": [],
        "attributes": [
          {
            "name": "placeholder",
            "value": "Enter your email"
          },
          {
            "name": "required",
            "value": "true"

## Moderation
You can classify input on multiple categories, which is a common way of doing moderation~

In [43]:
from typing import Optional

class Category(str, Enum):
    violence =  "violence"
    sexual =  "sexual"
    self_harm =  "self_harm"
    
    
class ContentCompliance(BaseModel):
    is_violating: bool
    category: Optional[Category]
    explanation_if_violating: Optional[str]
    
    
response = client.responses.parse(
    model="gpt-4o-mini",
    input=[
        {"role":"system", "content":"Determine if the user input violates specific guidelines and explain if they do."},
        {"role": "user", "content":"How to make a gun"}
    ],
    text_format=ContentCompliance
)

print(response.output_parsed.model_dump_json(indent=2))

{
  "is_violating": true,
  "category": "violence",
  "explanation_if_violating": "The request involves instructions for creating a firearm, which is illegal and poses significant risks to safety."
}


## Refusal with structured outputs

In [48]:
class Step(BaseModel):
    explanation: str
    output: str

class MathReasoning(BaseModel):
    steps: list[Step]
    final_answer: str

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "You are a helpful math tutor. Guide the user through the solution step by step."},
        {"role": "user", "content": "how can I solve 8x + 7 = -23"}
    ],
    response_format=MathReasoning,
)

math_reasoning = completion.choices[0].message

# If the model refuses to respond, you will get a refusal message
if (math_reasoning.refusal):
    print(math_reasoning.refusal)
else:
    print(math_reasoning.parsed)

steps=[Step(explanation='The equation given is 8x + 7 = -23. To solve for x, we need to first isolate the term with x on one side. We start by eliminating the constant term on the left side, which is 7.', output='8x + 7 = -23'), Step(explanation='Subtract 7 from both sides of the equation to remove the constant term on the left side. This gives us 8x + 7 - 7 = -23 - 7.', output='8x = -30'), Step(explanation='Now, we have 8x = -30. To solve for x, divide both sides of the equation by 8, the coefficient of x, to isolate x.', output='x = -30 / 8'), Step(explanation='When you divide both sides by 8, you get x = -30/8. Simplify the fraction by dividing the numerator and the denominator by their greatest common divisor, which is 2.', output='x = -15/4'), Step(explanation='After simplifying, we find that x = -15/4 or as a decimal, x = -3.75. This is the solution to the equation.', output='x = -15/4 or x = -3.75')] final_answer='x = -15/4 or x = -3.75'


## Streaming

In [56]:
class EntitiesModel(BaseModel):
    attributes: List[str]
    colors: List[str]
    animals: List[str]
    
with client.responses.stream(
    model="gpt-4o-mini",
    input=[
        {
            "role": "system",
            "content": "Extract entities from the input",
        },
        {
            "role": "user",
            "content": "The quick brown fox jumps over the lazy dog with piercing blue eyes",
        }
    ], 
    text_format=EntitiesModel
) as stream:
    for event in stream:
        if event.type == "response.refusal.delta":
            print(event.delta, end="")
        elif event.type == "response.output_text.delta":
            print(event.delta, end="")
        elif event.type == "response.error":
            print(event.error, end="")
        elif event.type == "response.completed":
            print("\n")
            
    final_response = stream.get_final_response()
    print(final_response.output_parsed.model_dump_json(indent=2))

{"attributes":["quick","brown","lazy","piercing","blue"],"colors":["brown","blue"],"animals":["fox","dog"]}

{
  "attributes": [
    "quick",
    "brown",
    "lazy",
    "piercing",
    "blue"
  ],
  "colors": [
    "brown",
    "blue"
  ],
  "animals": [
    "fox",
    "dog"
  ]
}
