# LLM Structured Outputs

1. pydantic
2. TypedDict
3. json_schema


In [2]:
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")

class Country(BaseModel):
    """ Information about a country """
    name: str = Field(description="name of the country")
    language: str = Field(description="language of the country")
    capital: str = Field(description="Capital of the country")

structured_llm = llm.with_structured_output(Country)
structured_llm


RunnableBinding(bound=ChatOpenAI(profile={'max_input_tokens': 128000, 'max_output_tokens': 16384, 'image_inputs': True, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True, 'structured_output': True, 'image_url_inputs': True, 'pdf_inputs': True, 'pdf_tool_message': True, 'image_tool_message': True, 'tool_choice': True}, client=<openai.resources.chat.completions.completions.Completions object at 0x10adc5710>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x1112e3590>, root_client=<openai.OpenAI object at 0x1107d5750>, root_async_client=<openai.AsyncOpenAI object at 0x1112e30d0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True), kwargs={'response_format': <class '__main__.Country'>, 'ls_structured_output_format': {'kwargs': {'method': 'json_schema', 'strict': None}, 'schema': {'type': 'fu

In [3]:
structured_llm.invoke("Tell me about France")

Country(name='France', language='French', capital='Paris')

In [None]:
RunnableBinding(
    bound=ChatOpenAI(
        profile={'max_input_tokens': 128000, 'max_output_tokens': 16384, 'image_inputs': True, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True, 'structured_output': True, 'image_url_inputs': True, 'pdf_inputs': True, 'pdf_tool_message': True, 'image_tool_message': True, 'tool_choice': True}, 
        client=<openai.resources.chat.completions.completions.Completions object at 0x10adc5710>, 
        async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x1112e3590>, 
        root_client=<openai.OpenAI object at 0x1107d5750>, 
        root_async_client=<openai.AsyncOpenAI object at 0x1112e30d0>,
        model_name='gpt-4o', 
        model_kwargs={}, 
        openai_api_key=SecretStr('**********'), 
        stream_usage=True), 
        kwargs={
            'response_format': <class '__main__.Country'>, 
            'ls_structured_output_format': {
                'kwargs': {'method': 'json_schema', 'strict': None}, 
                'schema': {
                    'type': 'function', 
                    'function': {
                        'name': 'Country', 
                        'description': 'Information about a country ', 
                        'parameters': {
                            'properties': {
                                'name': {'description': 'name of the country', 'type': 'string'}, 
                                'language': {'description': 'language of the country', 'type': 'string'}, 
                                'capital': {'description': 'Capital of the country', 'type': 'string'}
                            }, 
                            'required': ['name', 'language', 'capital'], 
                            'type': 'object'
                        }
                    }}}},
        config={}, 
        config_factories=[])
|RunnableBinding(bound=RunnableLambda(...), kwargs={}, config={}, config_factories=[], custom_output_type=<class '__main__.Country'>)

In [4]:
from typing_extensions import Annotated, TypedDict
from typing import Optional


# TypedDict
class Joke(TypedDict):
    """Joke to tell user."""

    setup: Annotated[str, ..., "The setup of the joke"]

    # Alternatively, we could have specified setup as:

    # setup: str                    # no default, no description
    # setup: Annotated[str, ...]    # no default, no description
    # setup: Annotated[str, "foo"]  # default, no description

    punchline: Annotated[str, ..., "The punchline of the joke"]
    rating: Annotated[Optional[int], None, "How funny the joke is, from 1 to 10"]


structured_llm = llm.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")


{'setup': 'Why are cats good at video games?',
 'punchline': 'Because they have nine lives!',
 'rating': 7}

In [5]:
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}
structured_llm = llm.with_structured_output(json_schema)

structured_llm.invoke("Tell me a joke about cats")

{'setup': "Why don't cats ever play poker in the jungle?",
 'punchline': 'Too many cheetahs!',
 'rating': 7}