In [19]:
# https://platform.openai.com/docs/guides/text?api-mode=chat&lang=python

##### 1. Text and prompting

In [10]:
from openai import OpenAI
from pydantic_settings import BaseSettings, SettingsConfigDict
from transformers import AutoTokenizer

class Settings(BaseSettings):
    model_config = SettingsConfigDict(env_file=".env")
    openai_api_key: str

settings = Settings()

In [7]:
client = OpenAI(api_key=settings.openai_api_key)

In [11]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")

tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [15]:
messages=[
    {
        "role": "user",
        "content": "Write a one-sentence bedtime story about a unicorn."
    }
]

formatted_text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_special_tokens=True
)

print((formatted_text))

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Write a one-sentence bedtime story about a unicorn.<|im_end|>



In [16]:
completion = client.chat.completions.create(
    model="gpt-4.1-nano",
    messages=[
        {
            "role": "user",
            "content": "Write a one-sentence bedtime story about a unicorn."
        }
    ]
)

print(completion.choices[0].message.content)

Once upon a time, under a starlit sky, a gentle unicorn named Luna galloped through enchanted meadows, spreading dreams of kindness and magic with every shimmering step.


In [18]:
print(completion.model_dump_json(indent=4))

{
    "id": "chatcmpl-BNehsuOpNR56z07Saqoftbwn9JUGT",
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
                "content": "Once upon a time, under a starlit sky, a gentle unicorn named Luna galloped through enchanted meadows, spreading dreams of kindness and magic with every shimmering step.",
                "refusal": null,
                "role": "assistant",
                "annotations": [],
                "audio": null,
                "function_call": null,
                "tool_calls": null
            }
        }
    ],
    "created": 1744977720,
    "model": "gpt-4.1-nano-2025-04-14",
    "object": "chat.completion",
    "service_tier": "default",
    "system_fingerprint": "fp_c1fb89028d",
    "usage": {
        "completion_tokens": 36,
        "prompt_tokens": 18,
        "total_tokens": 54,
        "completion_tokens_details": {
            "accepted_prediction_tokens": 0,

##### Structured Outputs

In [27]:
from pydantic import BaseModel
from enum import Enum

In [37]:
USER_MESSAGE = '''Extract the entities from the following sentence.:
"hyerin park is a student at the university of california, berkeley."
The entity tags are:
- PERSON
- LOCATION
- ORGANIZATION

Return in the following JSON format.
{"entities": [{"entity": str, "tag": str}, ...]}'''

class EntityTag(str, Enum):
    PERSON = "PERSON"
    LOCATION = "LOCATION"
    ORGANIZATION = "ORGANIZATION"

class Entity(BaseModel):
    entity: str
    tag: EntityTag

    class Config:
        """
        additionalProperties: false must always be set in objects
        (https://platform.openai.com/docs/guides/structured-outputs#additionalproperties-false-must-always-be-set-in-objects)
        """
        extra = "forbid"

class Entities(BaseModel):
    entities: list[Entity]

    class Config:
        extra = "forbid"

In [38]:
Entities.model_json_schema()

{'$defs': {'Entity': {'additionalProperties': False,
   'properties': {'entity': {'title': 'Entity', 'type': 'string'},
    'tag': {'$ref': '#/$defs/EntityTag'}},
   'required': ['entity', 'tag'],
   'title': 'Entity',
   'type': 'object'},
  'EntityTag': {'enum': ['PERSON', 'LOCATION', 'ORGANIZATION'],
   'title': 'EntityTag',
   'type': 'string'}},
 'additionalProperties': False,
 'properties': {'entities': {'items': {'$ref': '#/$defs/Entity'},
   'title': 'Entities',
   'type': 'array'}},
 'required': ['entities'],
 'title': 'Entities',
 'type': 'object'}

In [35]:
completion = client.beta.chat.completions.parse(
    model="gpt-4.1-nano",
    messages=[
        {"role": "user", "content": USER_MESSAGE},
    ],
    response_format=Entities,
)

print(completion.choices[0].message.parsed)

entities=[Entity(entity='hyerin park', tag=<EntityTag.PERSON: 'PERSON'>), Entity(entity='university of california, berkeley', tag=<EntityTag.ORGANIZATION: 'ORGANIZATION'>)]


In [30]:
result = completion.choices[0].message.parsed
print(result.model_dump_json(indent=4))

{
    "entities": [
        {
            "entity": "hyerin park",
            "tag": "PERSON"
        },
        {
            "entity": "university of california, berkeley",
            "tag": "ORGANIZATION"
        }
    ]
}
