In [5]:
# https://openai.com/index/introducing-structured-outputs-in-the-api/
# https://platform.openai.com/docs/guides/structured-outputs/introduction
# 

In [6]:
# %pip install --upgrade openai
# %pip install python-dotenv

In [4]:
from dotenv import load_dotenv
load_dotenv()

True

In [7]:
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "Extract the event information."},
        {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
    ],
    response_format=CalendarEvent,
)

event = completion.choices[0].message.parsed

In [18]:
import json

# Convertimos la instancia a un diccionario
event_dict = event.dict()

# Convertimos el diccionario a una cadena JSON
json_string = json.dumps(event_dict, indent=4)

# Imprimimos el JSON en formato de string
print(json_string)

{
    "name": "Science Fair",
    "date": "Friday",
    "participants": [
        "Alice",
        "Bob"
    ]
}


JSON is one of the most widely used formats in the world for applications to exchange data.

Structured Outputs is a feature that ensures the model will always generate responses that adhere to your supplied JSON Schema, so you don't need to worry about the model omitting a required key, or hallucinating an invalid enum value.

Some benefits of Structed Outputs include:

1. Reliable type-safety: No need to validate or retry incorrectly formatted responses
2. Explicit refusals: Safety-based model refusals are now programmatically detectable
3. Simpler prompting: No need for strongly worded prompts to achieve consistent formatting

Algunos ejemplos más:

### Chain of thought
You can ask the model to output an answer in a structured, step-by-step way, to guide the user through the solution.

In [29]:
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class Step(BaseModel):
    explanation: str
    output: str

class MathReasoning(BaseModel):
    steps: list[Step]
    final_answer: str

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "You are a helpful math tutor. Guide the user through the solution step by step."},
        {"role": "user", "content": "how can I solve 8x + 7 = -23"}
    ],
    response_format=MathReasoning,
)

math_reasoning = completion.choices[0].message.parsed

In [30]:
import json

math_reasoning_dict = math_reasoning.dict()
json_string = json.dumps(math_reasoning_dict, indent=4)
print(json_string)

{
    "steps": [
        {
            "explanation": "To solve the equation 8x + 7 = -23, we need to isolate the variable x on one side of the equation. The first step is to eliminate the constant term from the left side. We can do this by subtracting 7 from both sides of the equation.",
            "output": "8x + 7 - 7 = -23 - 7"
        },
        {
            "explanation": "Subtracting 7 from both sides simplifies the equation. On the left side, 7 - 7 equals 0, so the equation becomes 8x = -30.",
            "output": "8x = -30"
        },
        {
            "explanation": "Now, to solve for x, we need to get x by itself. Since x is currently multiplied by 8, we can divide both sides of the equation by 8 to isolate x.",
            "output": "8x / 8 = -30 / 8"
        },
        {
            "explanation": "Dividing both sides of the equation by 8 simplifies to x = -30/8. We can simplify the fraction by dividing the numerator and the denominator by their greatest common divi

In [31]:
math_response = completion.choices[0].message
if math_response.refusal:
    # handle refusal
    print(math_response.refusal)
else:
    print(math_response.parsed)

steps=[Step(explanation='To solve the equation 8x + 7 = -23, we need to isolate the variable x on one side of the equation. The first step is to eliminate the constant term from the left side. We can do this by subtracting 7 from both sides of the equation.', output='8x + 7 - 7 = -23 - 7'), Step(explanation='Subtracting 7 from both sides simplifies the equation. On the left side, 7 - 7 equals 0, so the equation becomes 8x = -30.', output='8x = -30'), Step(explanation='Now, to solve for x, we need to get x by itself. Since x is currently multiplied by 8, we can divide both sides of the equation by 8 to isolate x.', output='8x / 8 = -30 / 8'), Step(explanation='Dividing both sides of the equation by 8 simplifies to x = -30/8. We can simplify the fraction by dividing the numerator and the denominator by their greatest common divisor, which is 2.', output='x = -15/4'), Step(explanation='The fraction -15/4 cannot be simplified further, so x = -15/4 is the final solution to the equation.', o

In [32]:
math_response = completion.choices[0].message.parsed
print(math_response.steps)
print(math_response.final_answer)

[Step(explanation='To solve the equation 8x + 7 = -23, we need to isolate the variable x on one side of the equation. The first step is to eliminate the constant term from the left side. We can do this by subtracting 7 from both sides of the equation.', output='8x + 7 - 7 = -23 - 7'), Step(explanation='Subtracting 7 from both sides simplifies the equation. On the left side, 7 - 7 equals 0, so the equation becomes 8x = -30.', output='8x = -30'), Step(explanation='Now, to solve for x, we need to get x by itself. Since x is currently multiplied by 8, we can divide both sides of the equation by 8 to isolate x.', output='8x / 8 = -30 / 8'), Step(explanation='Dividing both sides of the equation by 8 simplifies to x = -30/8. We can simplify the fraction by dividing the numerator and the denominator by their greatest common divisor, which is 2.', output='x = -15/4'), Step(explanation='The fraction -15/4 cannot be simplified further, so x = -15/4 is the final solution to the equation.', output=

### Structured data extraction
You can define structured fields to extract from unstructured input data, such as reasearch papers.

In [21]:
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class ResearchPaperExtraction(BaseModel):
    title: str
    authors: list[str]
    abstract: str
    keywords: list[str]

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "You are an expert at structured data extraction. You will be given unstructured text from a research paper and should convert it into the given structure."},
        {"role": "user", "content": "..."}
    ],
    response_format=ResearchPaperExtraction,
)

research_paper = completion.choices[0].message.parsed

In [23]:
import json

research_paper_dict = research_paper.dict()
json_string = json.dumps(research_paper_dict, indent=4)
print(json_string)

{
    "title": "The Impact of Quantum Computing on Artificial Intelligence",
    "authors": [
        "Jane Doe",
        "John Smith"
    ],
    "abstract": "Quantum computing promises to revolutionize artificial intelligence by enhancing computational speed and efficiency. This paper explores the intersection of these two cutting-edge fields, outlining how quantum algorithms can optimize AI processes and presenting recent research findings in quantum-enhanced machine learning. We also discuss the potential challenges and ethical considerations surrounding the integration of quantum technologies into AI systems.",
    "keywords": [
        "Quantum Computing",
        "Artificial Intelligence",
        "Machine Learning",
        "Quantum Algorithms",
        "Ethics"
    ]
}


### UI Generation
You can generate valid HTML by representing it as recursive data structures with constraints, like enums.

In [24]:
from enum import Enum
from typing import List
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class UIType(str, Enum):
    div = "div"
    button = "button"
    header = "header"
    section = "section"
    field = "field"
    form = "form"

class Attribute(BaseModel):
    name: str
    value: str

class UI(BaseModel):
    type: UIType
    label: str
    children: List["UI"] 
    attributes: List[Attribute]

UI.model_rebuild() # This is required to enable recursive types

class Response(BaseModel):
    ui: UI

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "You are a UI generator AI. Convert the user input into a UI."},
        {"role": "user", "content": "Make a User Profile Form"}
    ],
    response_format=Response,
)

ui = completion.choices[0].message.parsed

ui=UI(type=<UIType.form: 'form'>, label='User Profile Form', children=[UI(type=<UIType.field: 'field'>, label='Full Name', children=[], attributes=[Attribute(name='type', value='text'), Attribute(name='placeholder', value='Enter your full name'), Attribute(name='required', value='true')]), UI(type=<UIType.field: 'field'>, label='Email Address', children=[], attributes=[Attribute(name='type', value='email'), Attribute(name='placeholder', value='Enter your email'), Attribute(name='required', value='true')]), UI(type=<UIType.field: 'field'>, label='Username', children=[], attributes=[Attribute(name='type', value='text'), Attribute(name='placeholder', value='Choose a username'), Attribute(name='required', value='true')]), UI(type=<UIType.field: 'field'>, label='Password', children=[], attributes=[Attribute(name='type', value='password'), Attribute(name='placeholder', value='Create a password'), Attribute(name='required', value='true')]), UI(type=<UIType.field: 'field'>, label='Date of Birt

In [25]:
import json

ui_dict = ui.dict()
json_string = json.dumps(ui_dict, indent=4)
print(json_string)

{
    "ui": {
        "type": "form",
        "label": "User Profile Form",
        "children": [
            {
                "type": "field",
                "label": "Full Name",
                "children": [],
                "attributes": [
                    {
                        "name": "type",
                        "value": "text"
                    },
                    {
                        "name": "placeholder",
                        "value": "Enter your full name"
                    },
                    {
                        "name": "required",
                        "value": "true"
                    }
                ]
            },
            {
                "type": "field",
                "label": "Email Address",
                "children": [],
                "attributes": [
                    {
                        "name": "type",
                        "value": "email"
                    },
                    {
                  

### Moderation
You can classify inputs on multiple categories, which is a common way of doing moderation.

In [26]:
from enum import Enum
from typing import Optional
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class Category(str, Enum):
    violence = "violence"
    sexual = "sexual"
    self_harm = "self_harm"

class ContentCompliance(BaseModel):
    is_violating: bool
    category: Optional[Category]
    explanation_if_violating: Optional[str]

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "Determine if the user input violates specific guidelines and explain if they do."},
        {"role": "user", "content": "How do I prepare for a job interview?"}
    ],
    response_format=ContentCompliance,
)

compliance = completion.choices[0].message.parsed

In [27]:
import json

compliance_dict = compliance.dict()
json_string = json.dumps(compliance_dict, indent=4)
print(json_string)

{
    "is_violating": false,
    "category": null,
    "explanation_if_violating": null
}
