In [81]:
from typing import List
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain.output_parsers.datetime import DatetimeOutputParser
from langchain.output_parsers.boolean import BooleanOutputParser
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.exceptions import OutputParserException
from langchain.output_parsers import OutputFixingParser

In [2]:
from dotenv import load_dotenv
load_dotenv()
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.0,
)

## Output Parsers

**String Parser**

In [3]:
llm.invoke("hello")

AIMessage(content='Hello! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 8, 'total_tokens': 18, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'audio_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_13eed4fce1', 'finish_reason': 'stop', 'logprobs': None}, id='run-572bd3e3-e047-407a-a255-d1bbfa6e4d0d-0', usage_metadata={'input_tokens': 8, 'output_tokens': 10, 'total_tokens': 18})

In [4]:
parser = StrOutputParser()

In [5]:
parser.invoke(
    input=llm.invoke("hello")
)

'Hello! How can I assist you today?'

### Other Parsers

**Datetime**

In [6]:
llm.invoke(
    "Output a random datetime in %Y-%m-%dT%H:%M:%S.%fZ. "
    "Don't say anything else"
)

AIMessage(content='2023-10-05T14:23:45.123456Z', response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 33, 'total_tokens': 50, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'audio_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_00428b782a', 'finish_reason': 'stop', 'logprobs': None}, id='run-167e8e20-8894-4817-9517-97c308dd63b3-0', usage_metadata={'input_tokens': 33, 'output_tokens': 17, 'total_tokens': 50})

In [109]:
parser = DatetimeOutputParser()

In [110]:
parser.invoke(
    input=llm.invoke(
        "Output a random datetime in %Y-%m-%dT%H:%M:%S.%fZ. "
        "Don't say anything else"
    )
)

datetime.datetime(2023, 10, 5, 14, 23, 45, 123456)

**Boolean**

In [9]:
llm.invoke(
    "Are you an AI? YES or NO only"
)

AIMessage(content='YES', response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 16, 'total_tokens': 18, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'audio_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_00428b782a', 'finish_reason': 'stop', 'logprobs': None}, id='run-77ccc891-f1ae-4af8-8401-743c3c433fa2-0', usage_metadata={'input_tokens': 16, 'output_tokens': 2, 'total_tokens': 18})

In [10]:
parser = BooleanOutputParser()

In [11]:
parser.invoke(
    input=llm.invoke(
        "Are you an AI? YES or NO only"
    )
)

True

In [12]:
parser.invoke(
    input=llm.invoke(
        "Are you Human? YES or NO only"
    )
)

False

## Structured

**Dict Schema**

In [31]:
from typing import Optional
from typing_extensions import Annotated, TypedDict

class UserInfo(TypedDict):
    """User's info."""
    name: Annotated[str, "", "User's name"]
    country: Annotated[str, "", "Where the user lives"]


In [32]:
llm_with_structure = llm.with_structured_output(UserInfo)

In [33]:
llm_with_structure.invoke(
    "My name is Henrique, and I am from Brazil"
)

{'name': 'Henrique', 'country': 'Brazil'}

In [35]:
llm_with_structure.invoke(
    "The sky is blue"
)

{}

In [36]:
llm_with_structure.invoke(
    "Hello, my name is the name capital of US. "
    "But I'm from a country where we usually associate with kangaroos"
)

{'name': 'Washington', 'country': 'Australia'}

**Pydantic**

In [59]:
from pydantic import BaseModel, Field

class PydanticUserInfo(BaseModel):
    """User's info."""
    name: str = Field(default="", description="User's name")
    country: str = Field(default="", description="Where the user lives")

In [60]:
llm_with_structure = llm.with_structured_output(PydanticUserInfo)

In [61]:
structured_output = llm_with_structure.invoke("The sky is blue")

In [62]:
structured_output

PydanticUserInfo(name='', country='')

In [63]:
print(structured_output.name)




In [64]:
print(structured_output.country)




In [65]:
structured_output = llm_with_structure.invoke(
    "Hello, my name is the name capital of US. "
    "But I'm from a country where we usually associate with kangaroos"
)

In [66]:
structured_output

PydanticUserInfo(name='Washington', country='Australia')

## Dealing with Errors

In [69]:
class Performer(BaseModel):
    """Filmography info about an actor/actress"""
    name: str = Field(description="name of an actor/actress")
    film_names: List[str] = Field(description="list of names of films they starred in")

In [70]:
llm_with_structure = llm.with_structured_output(Performer)

In [86]:
response = llm_with_structure.invoke(
    "Generate the filmography for Scarlett Johansson. Top 5 only"
)
response

Performer(name='Scarlett Johansson', film_names=['Lost in Translation', 'The Avengers', 'Her', 'Marriage Story', 'Black Widow'])

**Fixing Parser**

In [94]:
response.json()

'{"name":"Scarlett Johansson","film_names":["Lost in Translation","The Avengers","Her","Marriage Story","Black Widow"]}'

In [95]:
parser = PydanticOutputParser(pydantic_object=Performer)

In [96]:
parser.parse(response.json())

Performer(name='Scarlett Johansson', film_names=['Lost in Translation', 'The Avengers', 'Her', 'Marriage Story', 'Black Widow'])

In [105]:
misformatted_result = "{'name': 'Scarlett Johansson', 'film_names': ['The Avengers']}"

In [106]:
try:
    parser.parse(misformatted_result)
except OutputParserException as e:
    print(e)

Invalid json output: {'name': 'Scarlett Johansson', 'film_names': ['The Avengers']}


In [107]:
new_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)

In [108]:
new_parser.parse(misformatted_result)

Performer(name='Scarlett Johansson', film_names=['The Avengers', 'Lost in Translation', 'Marriage Story', 'Black Widow'])