In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY")

# PydanticOutputParser

## Imports

In [2]:
from langchain_openai import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser

In [3]:
from pydantic import BaseModel, Field, validator
from typing import List

In [4]:
model_name = 'gpt-3.5-turbo-instruct'
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)

## Documentation Example

In [5]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator('setup')
    def question_ends_with_question_mark(cls, field):
        if field[-1] != '?':
            raise ValueError("Badly formed question!")
        return field

In [6]:
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

In [7]:
# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

In [8]:
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

_input = prompt.format_prompt(query=joke_query)

In [13]:
output = model.invoke(_input.to_string())

In [14]:
parser.parse(output)

Joke(setup='Why did the tomato turn red?', punchline='Because it saw the salad dressing!')

Another example

In [19]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")

    # Throw error in case of receiving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
        for item in field:
            if item[0].isnumeric():
                raise ValueError("The word can not start with numbers!")
        return field

parser = PydanticOutputParser(pydantic_object=Suggestions)

In [27]:
from langchain.prompts import PromptTemplate

template = """
Offer a list of suggestions to substitue the specified target_word based the presented context.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt.format_prompt(
			target_word="behaviour",
			context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson."
)

In [28]:
from langchain_openai import OpenAI

# Before executing the following code, make sure to have
# your OpenAI key saved in the “OPENAI_API_KEY” environment variable.
model = OpenAI(model_name='gpt-3.5-turbo-instruct', temperature=0.0)

output = model.invoke(model_input.to_string())

parser.parse(output)

Suggestions(words=['one', 'two', 'three', 'four', 'five', 'six', 'eight', 'nine', 'ten'])

# Here's another example, but with a compound typed field.

In [29]:

class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")

In [31]:
parser = PydanticOutputParser(pydantic_object=Actor)

In [32]:
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [39]:
actor_query = "Generate the filmography for Del Toro."
_input = prompt.format_prompt(query=actor_query)

In [40]:
output = model.invoke(_input.to_string())

In [41]:
parser.parse(output)

Actor(name='Del Toro', film_names=["Pan's Labyrinth", 'The Shape of Water', 'Hellboy', 'Sicario', 'Blade II'])

## My Example

In [None]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")

    # Throw error in case of recieving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
        if field[0].isnumeric():
            raise ValueError("The word can not start with numbers!")
        return field

In [None]:
parser = PydanticOutputParser(pydantic_object=Suggestions)

In [None]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context.
{format_instructions}
target_word={target_word}
context={context}
"""

In [None]:
prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

_input = prompt.format_prompt(target_word="behaviour", context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.")

In [None]:
output = model(_input.to_string())

In [None]:
parser.parse(output)

Suggestions(words=['conduct', 'manner', 'action', 'demeanor', 'attitude', 'activity'])

## My Example, Multiple Outputs

In [78]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

    # Throw error in case of recieving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
      for item in field:
        if item[0].isnumeric():
          raise ValueError("The word can not start with numbers!")
      return field

    @validator('reasons')
    def end_with_dot(cls, field):
      for idx, item in enumerate( field ):
        if item[-1] != ".":
          field[idx] += "."
      return field

In [79]:
parser = PydanticOutputParser(pydantic_object=Suggestions)

In [80]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context. Give a reason for each word suggested
{format_instructions}
target_word={target_word}
context={context}
"""

In [81]:
prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt.format_prompt(target_word="behaviour", context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.")

In [82]:
print(model_input.to_string())


Offer a list of suggestions to substitue the specified target_word based the presented context. Give a reason for each word suggested
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"words": {"title": "Words", "description": "list of substitue words based on context", "type": "array", "items": {"type": "string"}}, "reasons": {"title": "Reasons", "description": "the reasoning of why this word fits the context", "type": "array", "items": {"type": "string"}}}, "required": ["words", "reasons"]}
```
target_word=behaviour
context=The behaviour of the students in the classroom was

In [83]:
output = model(model_input.to_string())

In [84]:
parser.parse(output)

Suggestions(words=['conduct', 'manage', 'handle', 'oversee'], reasons=['These words all suggest a sense of control and direction, which is the opposite of disruptive behaviour in a classroom setting.'])

# CommaSeparatedListOutputParser

## Imports

In [None]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser

## My example

In [None]:
parser = CommaSeparatedListOutputParser()

In [None]:
template = """
Offer a list of suggestions to substitue the word '{target_word}' based the presented the following text: {context}.
{format_instructions}
"""

In [None]:
prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt.format(
  target_word="behaviour",
  context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson."
)

In [None]:
model_name = 'text-davinci-003'
temperature = 0.0

model = OpenAI(model_name=model_name, temperature=temperature)

In [None]:
output = model(model_input)

In [None]:
parser.parse(output)

['Conduct',
 'Actions',
 'Demeanor',
 'Mannerisms',
 'Attitude',
 'Performance',
 'Reactions',
 'Interactions',
 'Habits',
 'Repertoire',
 'Disposition',
 'Bearing',
 'Posture',
 'Deportment',
 'Comportment']

# OutputFixingParser

In [65]:
from langchain.llms import OpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import OutputFixingParser

In [66]:
from pydantic import BaseModel, Field
from typing import List

In [68]:
model_name = 'gpt-3.5-turbo-instruct'
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)

In [69]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

## Example can fix

In [70]:
missformatted_output = '{"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}'

In [71]:
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}. Got: 1 validation error for Suggestions
reasons
  field required (type=value_error.missing)

In [72]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=model)

In [73]:
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=['refers to the way someone acts in a particular situation.', 'refers to the way someone behaves in a particular situation.'])

## Example can NOT fix

In [74]:
missformatted_output = '{"words": ["conduct", "manner"]}'

In [75]:
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"]}. Got: 1 validation error for Suggestions
reasons
  field required (type=value_error.missing)

In [76]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=model)

In [77]:
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=['These words both describe a way of behaving or carrying oneself.'])

# RetryOutputParser

In [193]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers.pydantic import OutputParserException
from langchain.output_parsers import RetryWithErrorOutputParser

In [86]:
from pydantic import BaseModel, Field, validator
from typing import List

In [87]:
model_name = 'gpt-3.5-turbo-instruct'
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)

In [184]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

    @validator('words')
    def not_start_with_number(cls, field):
      for item in field:
        if item[0].isnumeric():
          raise ValueError("The word can not start with numbers!")
      return field
    
    @validator('reasons')
    def end_with_dot(cls, field):
      for idx, item in enumerate( field ):
        if item[-1] != ".":
          field[idx] += "."
      return field
    
    # Validate that len(words) is equal to len(reasons)
    @validator('reasons')
    def validate_lengths(cls, field, values):
      if 'words' in values and len(field) != len(values['words']):
        raise ValueError("The words and reasons must have the same length!")
      return field

parser = PydanticOutputParser(pydantic_object=Suggestions)

In [185]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context. Offer another list whith the reasons for each word in the first list. The number of reasons must be equal to the number of words
{format_instructions}
target_word={target_word}
context={context}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt.format_prompt(target_word="behaviour", context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.")

In [189]:
missformatted_output = '{"words": ["conduct", "manner"]}'
output = model.invoke(model_input)


In [195]:
try:
    ans = parser.parse(output)
except OutputParserException:
    print('reintentando parser con prompt')
    retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=model)
    ans = retry_parser.parse_with_prompt(missformatted_output, model_input)

reintentando parser con prompt


In [196]:
ans

Suggestions(words=['conduct', 'manner'], reasons=["These words both refer to the way in which something is done or carried out, which is similar to the meaning of 'behaviour'.", 'Both of these words also have a more formal tone, which is appropriate for the context of a classroom setting.'])