In [4]:
!pip install langchain-core langgraph>0.2.27

In [5]:
!pip install -qU langchain-openai

In [6]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = "sk-"

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [10]:
print(llm.invoke("Tell me a joke"))

content='Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 11, 'total_tokens': 28, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None} id='run-4d67d633-f880-405c-848d-3de4006f7e31-0' usage_metadata={'input_tokens': 11, 'output_tokens': 17, 'total_tokens': 28, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}}


Using Pydantic Class 

1. Pydantic automatically validates the data, ensuring that it conforms to the expected types and constraints.

2. The key advantage of using Pydantic is that the model-generated output will be validated. Pydantic will raise an error if any required fields are missing or if any fields are of the wrong type.


In [43]:
from pydantic import BaseModel
from typing import List, Optional

class User(BaseModel):
    id: int
    name: str
    age: Optional[int] = None  # Optional field, can be None
    skills: List[str]

# Valid data
# user = User(id=1, name="Praveen", age=25, skills=["Python", "Django"])
# print(user)

# Invalid data (this will raise a validation error)
# invalid_user = User(id="abc", name="John", age="twenty", skills=[123])


In [11]:
from typing import Optional
from pydantic import BaseModel, Field

class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )

structured_llm = llm.with_structured_output(Joke)

structured_llm.invoke("Tell me a Joke")

Joke(setup='Why did the scarecrow win an award?', punchline='Because he was outstanding in his field!', rating=8)

TypeDict: 

If you don't want to use Pydantic, explicitly don't want validation of the arguments, you can define your schema using a TypedDict class. 

You need to define a dictionary structure with specific keys and types for use with static type checkers (like MyPy).Runtime validation is not required, and you just want to ensure type correctness during development.

In [44]:
from typing import TypedDict

class Person(TypedDict):
    name: str
    age: int

person: Person = {"name": "John", "age": 30}  # Correct

person: Person = {"name": "John", "age": "Hello"}  # Incorrect but, No error


to pass metadata in TypedDict, we will use Annotated

In [13]:
from typing_extensions import Annotated, TypedDict

class Joke(TypedDict):
    """Joke to tell user."""
    setup: Annotated[str, ..., "The setup of the joke"]
    punchline: Annotated[str, ..., "The punchline of the joke"]
    rating: Annotated[Optional[int], None, "How funny the joke is, from 1 to 10"]

structured_llm = llm.with_structured_output(Joke)
structured_llm.invoke("Tell me a joke about cats")


{'setup': 'Why was the cat sitting on the computer?',
 'punchline': 'Because it wanted to keep an eye on the mouse!',
 'rating': 7}

JSON Schema :


In [19]:
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}
structured_llm = llm.with_structured_output(json_schema)

structured_llm.invoke("Tell me a joke about cats")

{'setup': 'Why was the cat sitting on the computer?',
 'punchline': 'Because it wanted to keep an eye on the mouse!',
 'rating': 7}

Few Shot Prompting 

In [20]:
from langchain_core.prompts import ChatPromptTemplate

system = """You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:

example_user: Tell me a joke about planes
example_assistant: {{"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}}

example_user: Tell me another joke about planes
example_assistant: {{"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}}

example_user: Now about caterpillars
example_assistant: {{"setup": "Caterpillar", "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!", "rating": 5}}"""

prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{input}")])

few_shot_structured_llm = prompt | structured_llm
few_shot_structured_llm.invoke("what's something funny about woodpeckers")

{'setup': 'Woodpecker',
 'punchline': "Woodpecker knockin' on trees, just trying to find a good 'bark' to join the party!",
 'rating': 7}

#Using method argument

In [21]:
structured_llm = llm.with_structured_output(None, method="json_mode")

structured_llm.invoke(
    "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys"
)

{'setup': 'Why did the cat sit on the computer?',
 'punchline': 'Because it wanted to keep an eye on the mouse!'}

Not all models support .with_structured_output(), since not all models have tool calling or JSON mode support. For such models you'll need to directly prompt the model to use a specific format, and use an output parser to extract the structured response from the raw model output.

In [37]:
from typing import List

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""
    people: List[Person]


# Set up a parser
parser = PydanticOutputParser(pydantic_object=People)


In [40]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Person": {"description": "Information about a person.", "properties": {"name": {"description": "The name of the person", "title": "Name", "type": "string"}, "height_in_meters": {"description": "The height of the person expressed in meters.", "title": "Height In Meters", "type": "number"}}, "required": ["name", "height_in_meters"], "title": "Person", "type": "object"}}, "description": "Identifying information about all people in a text.", "properties": {"people": {"items": {"$ref": "#/$defs/Person"}, "title": "People", "type": "arra

In [39]:

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Wrap the output in `json` tags\n{format_instructions}",
        ),
        ("human", "{query}"),
    ]
).partial(format_instructions=parser.get_format_instructions())

In [25]:
query = "Praveen Reddy is 29 years old and he is 6 feet tall"

chain = prompt | llm | parser

chain.invoke({"query": query})

People(people=[Person(name='Praveen Reddy', height_in_meters=1.8288)])

#Custom Parsing

In [45]:
import json
import re
from typing import List

from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]



In [46]:
People.schema

<bound method BaseModel.schema of <class '__main__.People'>>

In [47]:

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Output your answer as JSON that  "
            "matches the given schema: \`\`\`json\n{schema}\n\`\`\`. "
            "Make sure to wrap the answer in \`\`\`json and \`\`\` tags",
        ),
        ("human", "{query}"),
    ]
).partial(schema=People.schema())



  "matches the given schema: \`\`\`json\n{schema}\n\`\`\`. "
  "Make sure to wrap the answer in \`\`\`json and \`\`\` tags",


In [48]:
print(prompt)

input_variables=['query'] input_types={} partial_variables={'schema': {'$defs': {'Person': {'description': 'Information about a person.', 'properties': {'name': {'description': 'The name of the person', 'title': 'Name', 'type': 'string'}, 'height_in_meters': {'description': 'The height of the person expressed in meters.', 'title': 'Height In Meters', 'type': 'number'}}, 'required': ['name', 'height_in_meters'], 'title': 'Person', 'type': 'object'}}, 'description': 'Identifying information about all people in a text.', 'properties': {'people': {'items': {'$ref': '#/$defs/Person'}, 'title': 'People', 'type': 'array'}}, 'required': ['people'], 'title': 'People', 'type': 'object'}} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['schema'], input_types={}, partial_variables={}, template='Answer the user query. Output your answer as JSON that  matches the given schema: \\`\\`\\`json\n{schema}\n\\`\\`\\`. Make sure to wrap the answer in \\`\\`\\`json and \\`\\`\\`

In [49]:

# Custom parser
def extract_json(message: AIMessage) -> List[dict]:
    """Extracts JSON content from a string where JSON is embedded between \`\`\`json and \`\`\` tags.

    Parameters:
        text (str): The text containing the JSON content.

    Returns:
        list: A list of extracted JSON strings.
    """
    text = message.content
    # Define the regular expression pattern to match JSON blocks
    pattern = r"\`\`\`json(.*?)\`\`\`"

    # Find all non-overlapping matches of the pattern in the string
    matches = re.findall(pattern, text, re.DOTALL)

    # Return the list of matched JSON strings, stripping any leading or trailing whitespace
    try:
        return [json.loads(match.strip()) for match in matches]
    except Exception:
        raise ValueError(f"Failed to parse: {message}")

  """Extracts JSON content from a string where JSON is embedded between \`\`\`json and \`\`\` tags.


In [50]:
query = "Anna is 23 years old and she is 6 feet tall"

print(prompt.format_prompt(query=query).to_string())

System: Answer the user query. Output your answer as JSON that  matches the given schema: \`\`\`json
{'$defs': {'Person': {'description': 'Information about a person.', 'properties': {'name': {'description': 'The name of the person', 'title': 'Name', 'type': 'string'}, 'height_in_meters': {'description': 'The height of the person expressed in meters.', 'title': 'Height In Meters', 'type': 'number'}}, 'required': ['name', 'height_in_meters'], 'title': 'Person', 'type': 'object'}}, 'description': 'Identifying information about all people in a text.', 'properties': {'people': {'items': {'$ref': '#/$defs/Person'}, 'title': 'People', 'type': 'array'}}, 'required': ['people'], 'title': 'People', 'type': 'object'}
\`\`\`. Make sure to wrap the answer in \`\`\`json and \`\`\` tags
Human: Anna is 23 years old and she is 6 feet tall


In [51]:
chain = prompt | llm | extract_json

chain.invoke({"query": query})



[{'people': [{'name': 'Anna', 'height_in_meters': 1.8288}]}]