<a href="https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Managing_Outputs_with_Output_Parsers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q langchain==0.0.208 openai==0.27.8 python-dotenv

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.
llmx 0.0.15a0 requires tiktoken, which is not installed.[0m[31m
[0m

In [None]:
from dotenv import load_dotenv

load_dotenv()

True

# PydanticOutputParser

## Imports

In [None]:
from langchain.chat_models import ChatOpenAI

from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser

In [None]:
from pydantic import BaseModel, Field, validator
from typing import List

In [None]:
model_name = 'gpt-3.5-turbo'
temperature = 0.0
model = ChatOpenAI(model_name=model_name, temperature=temperature)

## Documentation Example

In [None]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator('setup')
    def question_ends_with_question_mark(cls, field):
        if field[-1] != '?':
            raise ValueError("Badly formed question!")
        return field

In [None]:
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

In [None]:
# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

In [None]:
prompt_template = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [None]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [None]:
# Run the LLMChain to get the AI-generated answer
output = chain.run(joke_query)

In [None]:
parser.parse(output)

Joke(setup="Why don't skeletons fight each other?", punchline="They don't have the guts.")

In [None]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")

In [None]:
actor_query = "Generate the filmography for a random actor."

In [None]:
parser = PydanticOutputParser(pydantic_object=Actor)

In [None]:
prompt_template = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [None]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [None]:
# Run the LLMChain to get the AI-generated answer
output = chain.run(actor_query)

In [None]:
parser.parse(output)

Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'The Green Mile', 'Toy Story'])

## My Example

In [None]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")

    # Throw error in case of recieving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
        if field[0].isnumeric():
            raise ValueError("The word can not start with numbers!")
        return field

In [None]:
parser = PydanticOutputParser(pydantic_object=Suggestions)

In [None]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context.
{format_instructions}
target_word={target_word}
context={context}
"""

In [None]:
target_word="behaviour"
context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson."

In [None]:
prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [None]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [None]:
# Run the LLMChain to get the AI-generated answer
output = chain.run({"target_word": target_word, "context":context})

In [None]:
parser.parse(output)

Suggestions(words=['conduct', 'attitude', 'demeanor', 'actions', 'manner'])

## My Example, Multiple Outputs

In [None]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

    # Throw error in case of recieving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
      for item in field:
        if item[0].isnumeric():
          raise ValueError("The word can not start with numbers!")
      return field

    @validator('reasons')
    def end_with_dot(cls, field):
      for idx, item in enumerate( field ):
        if item[-1] != ".":
          field[idx] += "."
      return field

In [None]:
parser = PydanticOutputParser(pydantic_object=Suggestions)

In [None]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

In [None]:
prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [None]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [None]:
# Run the LLMChain to get the AI-generated answer
output = chain.run({"target_word": target_word, "context":context})

In [None]:
parser.parse(output)

Suggestions(words=['conduct', 'attitude', 'demeanor', 'actions'], reasons=["These words all describe the way the students are behaving in the classroom, which is disruptive and affecting the teacher's ability to teach."])

# CommaSeparatedListOutputParser

## Imports

In [None]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser

## My example

In [None]:
parser = CommaSeparatedListOutputParser()

In [None]:
template = """
Offer a list of suggestions to substitue the word '{target_word}' based the presented the following text: {context}.
{format_instructions}
"""

In [None]:
prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [None]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [None]:
# Run the LLMChain to get the AI-generated answer
output = chain.run({"target_word": target_word, "context":context})

In [None]:
parser.parse(output)

['conduct',
 'actions',
 'demeanor',
 'conduct',
 'performance',
 'attitude',
 'manners',
 'actions',
 'deportment',
 'mannerisms']

# OutputFixingParser

In [None]:
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import OutputFixingParser

In [None]:
from pydantic import BaseModel, Field
from typing import List

In [None]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

## Example can fix

In [None]:
missformatted_output = '{"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}'

In [None]:
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}. Got: 1 validation error for Suggestions
reasons
  field required (type=value_error.missing)

In [None]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=model)

In [None]:
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=['refers to the way someone acts in a particular situation.', 'refers to the way someone behaves in a particular situation.'])

## Example can NOT fix

In [None]:
missformatted_output = '{"words": ["conduct", "manner"]}'

In [None]:
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"]}. Got: 1 validation error for Suggestions
reasons
  field required (type=value_error.missing)

In [None]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=model)

In [None]:
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=['These words describe the behavior or way of carrying oneself in a given situation.'])

# RetryOutputParser

In [None]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import RetryWithErrorOutputParser

In [None]:
from pydantic import BaseModel, Field, validator
from typing import List

In [None]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

In [None]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt_template.format_prompt(target_word="behaviour", context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.")

In [None]:
missformatted_output = '{"words": ["conduct", "manner"]}'

In [None]:
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"]}. Got: 1 validation error for Suggestions
reasons
  field required (type=value_error.missing)

In [None]:
retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=model)

In [None]:
retry_parser.parse_with_prompt(missformatted_output, model_input)

Suggestions(words=['conduct', 'manner'], reasons=['These words both refer to the way in which something is carried out, which is relevant to the context of the disruptive behavior in the classroom.'])