# PydanticOutputParser

## import

In [1]:
from langchain.chat_models import ChatOpenAI

from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser

from pydantic import BaseModel, Field, field_validator
from pydantic import validator     # pydatic v2 validator deprecated
from typing import List
import keyring

model_name = 'gpt-3.5-turbo'
OPENAI_API_KEY = keyring.get_password('openai', 'key_for_windows')
temperature=0.0
model = ChatOpenAI(model_name=model_name, temperature=temperature, openai_api_key=OPENAI_API_KEY)

  model = ChatOpenAI(model_name=model_name, temperature=temperature, openai_api_key=OPENAI_API_KEY)


## documentation example

In [2]:
# define your desired data structure
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")
    
    # you can add custom validation logic easily with Pydantic
    @field_validator('setup')
    def question_ends_with_question_mark(cls, field):
        if field[-1] != '?':
            raise ValueError("Badly formed question!")
        return field

In [3]:
# and a query intended to prompt a language model to populate the data structure
joke_query = "Tell me a joke."

# set up a parser + inject intructions into the prompt template
parser = PydanticOutputParser(pydantic_object=Joke)

prompt_template = PromptTemplate(
    template="Answer the user query. \n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={'format_instructions':parser.get_format_instructions()}
)

chain = LLMChain(llm=model, prompt=prompt_template)

# run the LLMChain to get the AI-generated answer
output = chain.run(joke_query)

parser.parse(output)

  chain = LLMChain(llm=model, prompt=prompt_template)
  output = chain.run(joke_query)


Joke(setup="Why couldn't the bicycle stand up by itself?", punchline='Because it was two tired!')

In [4]:
# another example with compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names : List[str] = Field(description="list of names of films they starred in")
    
actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

prompt_template = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=['query'],
    partial_variables={"format_instructions":parser.get_format_instructions()}
)

chain = LLMChain(llm=model, prompt=prompt_template)

# run the LLMChain to get the AI-generated answer
output = chain.run(actor_query)

parser.parse(output)

Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'The Green Mile'])

In [5]:
# example 3
# define your desired data structure
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitute words based on context")
    
    # Throw error in vase of receiving a numbered-list from API
    @field_validator('words')
    def not_start_with_number(cls, field):
        if field[0].isnumeric():
            raise ValueError("The word can not start with numbers!")
        return field
    
parser = PydanticOutputParser(pydantic_object=Suggestions)

template = """
Offer a list of suggestions to substitute the specified target_word based on the present context.
{format_instructions}
target_word={target_word}
context={context}
"""

target_word = "behavior"
context = "The behavior of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson."

prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

chain = LLMChain(llm=model, prompt=prompt_template)

# run the llmchain to get the AI-generated answer
output = chain.run({"target_word": target_word, "context": context})
parser.parse(output)

Suggestions(words=['conduct', 'actions', 'conducting oneself', 'manners', 'demeanor'])

## Multiple Outputs

In [8]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of subtitute words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")
    
    # throw error in case of receiving a numbered-list from API
    @field_validator('words')
    def not_start_with_number(cls, field):
        for item in field:
            if item[0].isnumeric():
                raise ValueError("The word can not start with numbers!")
        return field
    
    @field_validator('reasons')
    def end_with_dot(cls, field):
        for idx, item in enumerate(field):
            if item[-1] != ".":
                field[idx] += "."
        return field

In [9]:
parser = PydanticOutputParser(pydantic_object=Suggestions)

template = """
Offer a list of suggestions to subtitute the specified target_word based the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions":parser.get_format_instructions()}
)

chain = LLMChain(llm=model, prompt=prompt_template)

# run the LLMChain to get the AI-generated answer
output = chain.run({"target_word":target_word, "context":context})

parser.parse(output)

Suggestions(words=['conduct', 'actions', 'manners', 'conduct'], reasons=["The word 'conduct' can be used to describe how the teacher manages the lesson.", "The word 'actions' can be used to refer to the students' behavior.", "The word 'manners' can be used to describe the students' behavior in a more polite way.", "The word 'conduct' can be used to describe how the teacher manages the lesson."])

# CommaSeparatedListOutputParser

In [12]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser

parser = CommaSeparatedListOutputParser()

template = """
Offer a list of suggestions to subtitute the word '{target_word}' based on the presented the following text: {context}.
{format_instructions}
"""

prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions":parser.get_format_instructions()}
)

In [13]:
chain = LLMChain(llm=model, prompt=prompt_template)

# run the LLMChain to get the AI-generated answer
output = chain.run({"target_word":target_word, "context":context})

parser.parse(output)

['conduct', 'actions', 'demeanor', 'conduct', 'mannerisms']

## StructuredOutputParser

In [None]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

response_schemas = [
    ResponseSchema(name='words', description="A subtittue word based on context"),
    ResponseSchema(name="reasons", description="""the reasoning of why this word fits the context.""")
]

parser = StructuredOutputParser.from_response_schemas(response_schemas)

# Fixing errors

## OutputFixingParser

In [16]:
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import OutputFixingParser

from pydantic import BaseModel, Field
from typing import List

# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

In [17]:
# example
missformatted_output = '{"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}'
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}. Got: 1 validation error for Suggestions
reasons
  Field required [type=missing, input_value={'words': ['conduct', 'ma...particular situation.']}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/missing

In [18]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=model)
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=['refers to the way someone acts in a particular situation.', 'refers to the way someone behaves in a particular situation.'])

In [22]:
# example can not fix
missformatted_output = '{"words: ["conduct", "manner"]}'
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {}. Got: 2 validation errors for Suggestions
words
  Field required [type=missing, input_value={}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/missing
reasons
  Field required [type=missing, input_value={}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/missing

In [23]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=model)
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=['appropriate behavior', 'way of carrying oneself'])

## RetryOutputParser

In [24]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import RetryOutputParser

from pydantic import BaseModel, Field, field_validator
from typing import List

# define your desired data structure
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of subtitute words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")
    
parser = PydanticOutputParser(pydantic_object=Suggestions)

In [27]:
template = """
Offer a list of suggestions to subtitute the specified target_word based on the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt_template.format_prompt(target_word="behaviour", context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.")

In [28]:
missformatted_output = '{"words": ["conduct", "manner"]}'
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"]}. Got: 1 validation error for Suggestions
reasons
  Field required [type=missing, input_value={'words': ['conduct', 'manner']}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/missing

In [30]:
retry_parser = RetryOutputParser.from_llm(parser=parser, llm=model)
retry_parser.parse_with_prompt(missformatted_output, model_input)

Suggestions(words=['conduct', 'manner'], reasons=["The word 'conduct' is a suitable substitute for 'behaviour' as it refers to the way in which something is carried out or managed, similar to how 'behaviour' describes the actions of individuals. The word 'manner' can also be used as a substitute as it pertains to the way in which something is done or the behavior of a person."])