In [1]:
from typing import List
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.exceptions import OutputParserException
from langchain_classic.output_parsers.datetime import DatetimeOutputParser
from langchain_classic.output_parsers.boolean import BooleanOutputParser
from langchain_classic.output_parsers import OutputFixingParser
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("API_KEY")
base_url = os.getenv("OPENAI_ENDPOINT")
model_name = "gpt-4o-mini"
temp=0.0

llm = ChatOpenAI(
    base_url=base_url,
    api_key=api_key,
    model=model_name,
    temperature=temp
)

## **LLM Output Parsing**

**String Parser**

In [2]:
llm.invoke("hello").content

'Hello! How can I assist you today?'

In [4]:
parser = StrOutputParser()
parser.invoke(
    llm.invoke("hello")
)

'Hello! How can I assist you today?'

**Datetime**

In [None]:
response = llm.invoke(
    "Output a random datetime in %Y-%m-%dT%H:%M:%S.%fZ. "
    "Don't say anything else"
)
print(response.content)

2023-10-05T14:23:45.123456Z


In [None]:
parser = DatetimeOutputParser()
response = parser.invoke(
    llm.invoke(
        "Output a random datetime in %Y-%m-%dT%H:%M:%S.%fZ. "
        "Don't say anything else"
    )
)
print(type(response))

# print the value only
print(response)


<class 'datetime.datetime'>
2023-10-05 14:23:45.123456


**Boolean**

In [21]:
response = llm.invoke(
    "Are you an AI? YES or NO only"
)
print(response.content)

YES


In [22]:
parser = BooleanOutputParser()
parser.invoke(
    input=llm.invoke(
        "Are you an AI? YES or NO only"
    )
)

True

In [None]:
parser.invoke(
    input=llm.invoke(
        "Are you Human? YES or NO only"
    )
)

## **Structured LLM Output**
- LangChain **llm.with_structured_output(structured data object)** gives us llm that produces structured output based on a given schema
- we can pass the structued data as a class of **TypedDict** or Pydantic **BaseModel**
- We can check and fix errors

#### Using TypedDict

In [None]:
from typing_extensions import Annotated, TypedDict

class UserInfo(TypedDict):
    """User's info."""
    name: Annotated[str, "", "User's name. Defaults to ''"]
    country: Annotated[str, "", "Where the user lives. Defaults to ''"]

llm_with_structure = llm.with_structured_output(UserInfo)

response = llm_with_structure.invoke(
    "My name is Henrique, and I am from Brazil"
)
print(response)

response = llm_with_structure.invoke(
    "The sky is blue"
)
print(response)

response = llm_with_structure.invoke(
    "Hello, my name is the same as the capital of the U.S.  "
    "But I'm from a country where we usually associate with kangaroos"
)
print(response)


#### Using Pydantic BaseModel

In [None]:
from pydantic import BaseModel, Field

class PydanticUserInfo(BaseModel):
    """User's info."""
    name: Annotated[str, Field(description="User's name. Defaults to ''", default=None)]
    country: Annotated[str, Field(description="Where the user lives. Defaults to ''", default=None, )]

llm_with_structure = llm.with_structured_output(PydanticUserInfo)

structured_output = llm_with_structure.invoke("The sky is blue")

structured_output


In [None]:
from pydantic import BaseModel, Field

class PydanticUserInfo(BaseModel):
    """User's info."""
    name: Annotated[str, Field(description="User's name. Defaults to ''", default=None)]
    country: Annotated[str, Field(description="Where the user lives. Defaults to ''", default=None, )]

llm_with_structure = llm.with_structured_output(PydanticUserInfo)

structured_output = llm_with_structure.invoke("The sky is blue")

print(structured_output) # here we will not get any values because the input doesnt have name & country semantics

structured_output = llm_with_structure.invoke(
    "Hello, my name is the same as the capital of the U.S.  "
    "But I'm from a country where we usually associate with kangaroos"
)

print(structured_output.name)

print(structured_output.country)


## Fixing LLM Structured Output Errors with LLM Parsers (Self-healing)

In [None]:
class Performer(BaseModel):
    """Filmography info about an actor/actress"""
    name: Annotated[str, Field(description="name of an actor/actress")]
    film_names: Annotated[List[str], Field(description="list of names of films they starred in")]

llm_with_structure = llm.with_structured_output(Performer)

response = llm_with_structure.invoke(
    "Generate the filmography for Scarlett Johansson. Top 5 only"
)
response

In [None]:
# Now we want to parse json formats back into the Pydantic object

# lets say we have two formats: a good one and a bad one
good_formatted_result = response.model_dump_json()
print(good_formatted_result)

misformatted_result = "{'name': 'Scarlett Johansson', 'film_names': ['The Avengers']}"
print(misformatted_result)

# Basic Pydantic Parsers are based on Pydantic objects:
parser = PydanticOutputParser(pydantic_object=Performer)

# now let's parse the good format
print(parser.parse(good_formatted_result))

# let's try to parse the bad format
try:
    print(parser.parse(misformatted_result)) # Generates error
except OutputParserException as e:
    print(e)

In [None]:
# to fix the bad format, we create LLM-based Parser from the base Pydantic parser
smart_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)

smart_parser.parse(misformatted_result)