---------------------------------
#### return structured data from a model
---------------------------------

In [1]:
from langchain_openai import ChatOpenAI

In [2]:
llm = ChatOpenAI(model="gpt-4o-mini")

#### using pydantic

In [3]:
from typing import Optional

from pydantic import BaseModel, Field

In [4]:
class Joke(BaseModel):
    """Joke to tell user."""

    setup:     str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating:    Optional[int] = Field(
                                default    = None, 
                                description= "How funny the joke is, from 1 to 10"
    )

In [5]:
structured_llm = llm.with_structured_output(Joke)

In [6]:
response = structured_llm.invoke("Tell me a joke about cats")
response

Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=7)

In [7]:
dict(response)

{'setup': 'Why was the cat sitting on the computer?',
 'punchline': 'Because it wanted to keep an eye on the mouse!',
 'rating': 7}

Example

In [8]:
from typing import List
from pydantic import BaseModel, Field

In [9]:
class Recipe(BaseModel):
    """Recipe details."""

    title:        str       = Field(description="The title of the recipe")
    ingredients:  List[str] = Field(description="List of ingredients needed")
    cooking_time: int       = Field(description="Cooking time in minutes")
    steps:        List[str] = Field(description="Step-by-step instructions")

In [10]:
structured_llm = llm.with_structured_output(Recipe)

In [11]:
dict(structured_llm.invoke("Give me a recipe for chocolate chip cookies."))

{'title': 'Chocolate Chip Cookies',
 'ingredients': ['2 1/4 cups all-purpose flour',
  '1/2 teaspoon baking soda',
  '1 cup unsalted butter, room temperature',
  '1/2 cup granulated sugar',
  '1 cup packed brown sugar',
  '1 teaspoon salt',
  '2 teaspoons pure vanilla extract',
  '2 large eggs',
  '2 cups semisweet chocolate chips',
  '1 cup chopped nuts (optional)'],
 'cooking_time': 15,
 'steps': ['Preheat your oven to 350°F (175°C).',
  'In a small bowl, stir together the flour and baking soda; set aside.',
  'In a large bowl, using a mixer, cream together the butter, granulated sugar, brown sugar, and salt until smooth.',
  'Beat in the vanilla extract and eggs one at a time, mixing well after each addition.',
  'Gradually blend in the flour mixture until fully incorporated.',
  'Stir in the chocolate chips and nuts (if using).',
  'Drop by rounded tablespoon onto ungreased baking sheets.',
  'Bake for 10 to 12 minutes in the preheated oven, or until edges are nicely golden.',
  'R

**JSON schema**

In [12]:
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}

In [13]:
structured_llm = llm.with_structured_output(json_schema)

In [14]:
structured_llm.invoke("Tell me a joke about cats")

{'setup': 'Why did the cat sit on the computer?',
 'punchline': 'Because it wanted to keep an eye on the mouse!',
 'rating': 8}

#### Few shot prompting

In [15]:
from langchain_core.prompts import ChatPromptTemplate

In [16]:
system = """You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:

example_user: Tell me a joke about planes
example_assistant: {{"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}}

example_user: Tell me another joke about planes
example_assistant: {{"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}}

example_user: Now about caterpillars
example_assistant: {{"setup": "Caterpillar", "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!", "rating": 5}}"""


#### Using Parsers with Custom Validation Logic

In [17]:
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate

from langchain_openai import OpenAI
from pydantic import BaseModel, Field, model_validator

In [18]:
model = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.0)

In [19]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str     = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # add custom validation logic with Pydantic.
    @model_validator(mode="before")
    @classmethod
    def question_ends_with_question_mark(cls, values: dict) -> dict:
        setup = values.get("setup")            # Accessing the setup Field in values
        if setup and setup[-1] != "?":
            raise ValueError("Badly formed question!")
        return values

In [20]:
parser = PydanticOutputParser(pydantic_object=Joke)

In [21]:
prompt = PromptTemplate(
    template         = "Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables  = ["query"],
    partial_variables= {"format_instructions": parser.get_format_instructions()},
)

In [22]:
prompt_and_model = prompt | model

output = prompt_and_model.invoke({"query": "Tell me a joke."})

parser.invoke(output)

Joke(setup='Why did the tomato turn red?', punchline='Because it saw the salad dressing!')

Another example

In [23]:
from typing import List

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

In [24]:
class Person(BaseModel):
    """Information about a person."""

    name:             str   = Field(..., description="The name of the person")
    height_in_meters: float = Field(..., description="The height of the person expressed in meters.")

- `name` is a string field that will store the name of the person.
- `Field(...)`: The ellipsis (...) here means this field is **required**. Pydantic will raise an error if this field is missing when creating an instance of Person.
- `description`="The name of the person": Adds a description that helps clarify what data this field represents, which can be useful for documentation or autogenerated API schemas.

In [25]:
class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]

In [26]:
# Set up a parser
parser = PydanticOutputParser(pydantic_object=People)

In [27]:
# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Wrap the output in `json` tags\n{format_instructions}",
        ),
        (
            "human", "{query}"
        ),
    ]
).partial(format_instructions = parser.get_format_instructions())