#### return structured output

In [1]:
from langchain_openai import ChatOpenAI

In [2]:
llm = ChatOpenAI(model="gpt-4o-mini")

In [3]:
from typing import Optional

from pydantic import BaseModel, Field

In [5]:
# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""

    setup:     str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating:    Optional[int] = Field(
                default    = None, 
                description= "How funny the joke is, from 1 to 10"
    )

In [8]:
structured_llm = llm.with_structured_output(Joke)

In [9]:
response = structured_llm.invoke("Tell me a joke about LLM")
response

Joke(setup='Why did the LLM go to therapy?', punchline='Because it had too many unresolved parameters!', rating=None)

In [10]:
dict(response)

{'setup': 'Why did the LLM go to therapy?',
 'punchline': 'Because it had too many unresolved parameters!',
 'rating': None}

Example

In [11]:
from typing import List

In [12]:
# Pydantic
class Recipe(BaseModel):
    """Recipe details."""

    title:        str       = Field(description="The title of the recipe")
    ingredients:  List[str] = Field(description="List of ingredients needed")
    cooking_time: int       = Field(description="Cooking time in minutes")
    steps:        List[str] = Field(description="Step-by-step instructions")

In [11]:
# Using structured output with the model
structured_llm = llm.with_structured_output(Recipe)

In [13]:
dict(structured_llm.invoke("Give me a recipe for chocolate chip cookies."))

{'title': 'Chocolate Chip Cookies',
 'ingredients': ['2 1/4 cups all-purpose flour',
  '1/2 teaspoon baking soda',
  '1 cup unsalted butter, room temperature',
  '1/2 cup granulated sugar',
  '1 cup packed light-brown sugar',
  '1 teaspoon salt',
  '2 teaspoons pure vanilla extract',
  '2 large eggs',
  '2 cups semisweet and/or milk chocolate chips',
  '1 cup chopped nuts (optional)'],
 'cooking_time': 15,
 'steps': ['Preheat your oven to 350°F (175°C).',
  'In a small bowl, whisk together the flour and baking soda; set aside.',
  'In a large bowl, using a hand mixer or stand mixer, cream the butter and sugars together on medium speed until light and fluffy, about 2 minutes.',
  'Add the salt, vanilla, and eggs; beat until well combined.',
  'Gradually add the flour mixture, mixing until just combined.',
  'Fold in the chocolate chips and nuts (if using).',
  'Drop tablespoon-sized balls of dough onto ungreased baking sheets, spacing them about 2 inches apart.',
  'Bake for 10-12 minut

JSON Schema

In [13]:
json_schema = {
    "title":       "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}

In [14]:
structured_llm = llm.with_structured_output(json_schema)

In [15]:
structured_llm.invoke("Tell me a joke about cats")

{'setup': 'Why was the cat sitting on the computer?',
 'punchline': 'Because it wanted to keep an eye on the mouse!',
 'rating': 7}

#### Output parsers

In [16]:
from langchain_core.output_parsers import PydanticOutputParser

from langchain_core.prompts import PromptTemplate

from langchain_openai import OpenAI
from pydantic import BaseModel, Field, model_validator

In [17]:
model = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.0)

In [18]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str     = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # add custom validation logic with Pydantic.
    @model_validator(mode="before")
    @classmethod
    def question_ends_with_question_mark(cls, values: dict) -> dict:
        setup = values.get("setup")            # Accessing the setup Field in values
        if setup and setup[-1] != "?":
            raise ValueError("Badly formed question!")
        return values

In [19]:
parser = PydanticOutputParser(pydantic_object=Joke)

In [21]:
prompt = PromptTemplate(
    template         = "Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables  = ["query"],
    partial_variables= {"format_instructions": parser.get_format_instructions()},
)

In [22]:
prompt_and_model = prompt | model

output = prompt_and_model.invoke({"query": "Tell me a joke."})

parser.invoke(output)

Joke(setup='Why did the tomato turn red?', punchline='Because it saw the salad dressing!')

Another example ...

In [23]:
from typing import List

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

In [24]:
class Person(BaseModel):
    """Information about a person."""

    name:             str   = Field(..., description="The name of the person")
    height_in_meters: float = Field(..., description="The height of the person expressed in meters.")

In [25]:
class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]

In [27]:
parser = PydanticOutputParser(pydantic_object=People)

In [28]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Wrap the output in `json` tags\n{format_instructions}",
        ),
        (
            "human", "{query}"
        ),
    ]
).partial(format_instructions = parser.get_format_instructions())

In [29]:
query = "Anna is 23 years old and she is 6 feet tall"

print(prompt.invoke(query).to_string())

System: Answer the user query. Wrap the output in `json` tags
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Person": {"description": "Information about a person.", "properties": {"name": {"description": "The name of the person", "title": "Name", "type": "string"}, "height_in_meters": {"description": "The height of the person expressed in meters.", "title": "Height In Meters", "type": "number"}}, "required": ["name", "height_in_meters"], "title": "Person", "type": "object"}}, "description": "Identifying information about all people in a text.", "properties": {"people": {"items"

In [30]:
chain = prompt | llm | parser

chain.invoke({"query": query})

People(people=[Person(name='Anna', height_in_meters=1.8288)])