In [14]:
from openai import OpenAI
from pydantic import BaseModel
from enum import Enum
from typing import Optional

import os

In [2]:
os.environ["OPENAI_API_KEY"] = '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'

In [3]:
client = OpenAI()

client

<openai.OpenAI at 0x7fc3b0d7c970>

### Extracting data from prompts

In [7]:
class FlightBooking(BaseModel):
    passenger_name: str
    origin: str
    destination: str
    date: str
    seat_class: str

Note how we use a specific GPT model - this is called using a model snapshot

Snapshots let you lock in a specific version of the model so that performance and behavior remain consistent.

In [8]:
response = client.responses.parse(
    model="gpt-4.1-2025-04-14",
    input=[
        {"role": "system", "content": "Extract the flight booking details from the message."},
        {
            "role": "user",
            "content": "John Smith wants to fly from New York to San Francisco next Monday in business class.",
        },
    ],
    text_format=FlightBooking,
)

booking = response.output_parsed

print(booking)

passenger_name='John Smith' origin='New York' destination='San Francisco' date='next Monday' seat_class='business'


Note the date comes from the knowledge cut-off of the model - June 2024

In [10]:
response = client.responses.parse(
    model="gpt-4.1-2025-04-14",
    input=[
        {"role": "system", "content": "Extract the flight booking details from the message."},
        {
            "role": "user",
            "content": (
                "Please schedule a flight for Priya Mehta from Delhi to Singapore this Saturday. "
                "She prefers economy class. Let me know once it’s booked."
            ),
        },
    ],
    text_format=FlightBooking,
)

booking = response.output_parsed

print(booking)

passenger_name='Priya Mehta' origin='Delhi' destination='Singapore' date='2024-06-22' seat_class='economy'


### Content moderation

In [15]:
class Category(str, Enum):
    violence = "violence"
    sexual = "sexual"
    self_harm = "self_harm"

class ContentCompliance(BaseModel):
    is_violating: bool
    category: Optional[Category]
    explanation_if_violating: Optional[str]

In [16]:
response = client.responses.parse(
    model="gpt-4.1-2025-04-14",
    input=[
        {
            "role": "system",
            "content": "Determine if the user input violates specific guidelines and explain if they do.",
        },
        {
            "role": "user",
            "content": "Sometimes I think it would be better if I just disappeared completely.",
        },
    ],
    text_format=ContentCompliance,
)

compliance = response.output_parsed

print(compliance)

is_violating=True category=<Category.self_harm: 'self_harm'> explanation_if_violating='The message expresses thoughts related to self-harm or suicidal ideation, which are sensitive and potentially harmful topics. This requires careful handling and support, as it suggests the user may be experiencing emotional distress.'


In [21]:
json_schema = {
    "format": {
        "type": "json_schema",
        "name": "log_entries",
        "schema": {
            "type": "object",
            "properties": {
                "entries": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "timestamp": {"type": "string"},
                            "level": {
                                "type": "string",
                                "enum": ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
                            },
                            "message": {"type": "string"}
                        },
                        "required": ["timestamp", "level", "message"],
                        "additionalProperties": False
                    }
                }
            },
            "required": ["entries"],
            "additionalProperties": False
        },
        "strict": True
    }
}

In [22]:
response = client.responses.create(
    model="gpt-4.1-2025-04-14",
    input=[
        {
            "role": "system",
            "content": "You are a structured log parser. Extract timestamp, level, and "
                       "message from log entries."
        },
        {
            "role": "user",
            "content": "2024-05-15T13:42:01Z [ERROR] Failed to connect to database after 3 retries"
        }
    ],
    text=json_schema
)

print(response.output_text)

{"entries":[{"timestamp":"2024-05-15T13:42:01Z","level":"ERROR","message":"Failed to connect to database after 3 retries"}]}


In [23]:
response = client.responses.create(
    model="gpt-4.1-2025-04-14",
    input=[
        {
            "role": "system",
            "content": "You are a structured log parser. Extract timestamp, level, and "
                       "message from log entries."
        },
        {
            "role": "user",
            "content": (
                "2024-05-15T13:42:01Z [ERROR] Failed to connect to database after 3 retries\n"
                "2024-05-15T13:42:02Z [INFO] Retrying connection\n"
                "2024-05-15T13:42:04Z [WARNING] Connection still unstable\n"
                "2024-05-15T13:42:10Z [CRITICAL] System shutdown initiated"
            )
        }
    ],
    text=json_schema
)

print(response.output_text)

