# Pydantic Output Parsers

In [5]:
# Add OpenAI API key here:
# import os
# os.environ['OPENAI_API_KEY'] = ''

---


In [7]:
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate

LLMChain does not automatically parse the output by default, even if the prompt object has an output parser. To apply the output parser on the LLM output you can add an output_parser.


In [8]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List

In [10]:
class Ideas(BaseModel):
    ideas: List[str] = Field(description="A list of ideas for a story.")

chat = ChatOpenAI()

parser = PydanticOutputParser(pydantic_object=Ideas)

prompt = ChatPromptTemplate.from_template(
"I want you to brainstorm ideas for characters for my story. The genre is {genre}.\n  {format_instructions}"
)

chain = LLMChain(llm=chat, prompt=prompt, output_parser=parser)

In [11]:
result = chain.run(
    {"genre": "fantasy", "format_instructions": parser.get_format_instructions()}
)

In [None]:
print(result)
print(result.dict())

ideas=['A brave warrior with a mysterious past.', 'A mischievous and witty sorcerer with a knack for illusion magic.', 'A young farm girl with a secret ability to communicate with animals.', 'A wise and ancient dragon who has taken human form to guide adventurers.', 'A cursed prince trapped in the body of a beast, seeking redemption.', 'A rogue thief with a heart of gold, always looking for the next big score.', 'A powerful witch wielding dark magic, torn between good and evil.', 'A fearless and skilled archer from a nomadic tribe, chosen by prophecy.', 'A misfit group of unlikely heroes with unique abilities, brought together by fate.', 'A clever and cunning trickster god, playing pranks on mortals for his own amusement.']
{'ideas': ['A brave warrior with a mysterious past.', 'A mischievous and witty sorcerer with a knack for illusion magic.', 'A young farm girl with a secret ability to communicate with animals.', 'A wise and ancient dragon who has taken human form to guide adventurer

------------------------------------

## Using Output Parsers in LangChain Expression Language (LCEL)

In [18]:
chain = prompt | ChatOpenAI() | parser

In [19]:
result = chain.invoke({
    "genre": "fantasy",
    "format_instructions": parser.get_format_instructions()
})

In [21]:
result

Ideas(ideas=['1. A young orphan with the ability to communicate with animals.', '2. A wise and powerful sorcerer who has been banished from his kingdom.', '3. A skilled thief with a mysterious past.', '4. A courageous warrior from a hidden tribe.', '5. A mischievous fairy with the power to grant wishes.', '6. An ancient dragon with the ability to shape-shift into human form.', '7. A talented bard with a magical musical instrument.', '8. A cursed princess trapped in a tower guarded by a fearsome beast.', '9. A cunning and manipulative villain seeking to conquer the world.', '10. A group of unlikely allies brought together by a prophecy.'])

---------------------------------------------

## Validate LLM Responses Based on Business Logic

Using the field validator within Pydantic, you can validate the output of the LLM based on business logic. For example, if you want to validate the output of the LLM to be a valid email address, you can use the EmailStr validator.

In [58]:
from pydantic import BaseModel, Field, validator, validate_email
from typing import List
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain.schema import OutputParserException

# Define a prompt template for generating emails with customizable parameters
prompt = ChatPromptTemplate.from_template(
    "I want to generate a list of 10 emails for my marketing campaign. The emails should be {email_type}.\n  {format_instructions}"
)

bad_prompt = ChatPromptTemplate.from_template(
    "I want to generate a list of 2 emails for my marketing campaign. The emails should be {email_type}.\n  {format_instructions}"
)

# Create a Pydantic BaseModel to represent the generated emails
class Emails(BaseModel):
    emails: List[str] = Field(description="A list of generated emails.")

    @validator("emails")
    def validate_email_count(cls, emails):
        if len(emails) < 5:
            raise ValueError("At least 10 emails must be generated.")
        return emails

# Initialize a PydanticOutputParser for processing Langchain output
parser = PydanticOutputParser(pydantic_object=Emails)

# Construct the Langchain pipeline with the defined prompt and parser
good_chain = prompt | ChatOpenAI() | parser
bad_chain = bad_prompt | ChatOpenAI() | parser

In [59]:
# Invoke the Langchain pipeline with desired parameters
try:
    result = good_chain.invoke({
        "email_type": "business",  # Customize email type here
        "format_instructions": parser.get_format_instructions()
    })

    # These are the emails:
    print(result.emails)

    # Validate the generated emails using your validate_email function
    for email in result.emails:
        print(validate_email(email))
except OutputParserException as e:
    print(e)

['email1@example.com', 'email2@example.com', 'email3@example.com', 'email4@example.com', 'email5@example.com', 'email6@example.com', 'email7@example.com', 'email8@example.com', 'email9@example.com', 'email10@example.com']
('email1', 'email1@example.com')
('email2', 'email2@example.com')
('email3', 'email3@example.com')
('email4', 'email4@example.com')
('email5', 'email5@example.com')
('email6', 'email6@example.com')
('email7', 'email7@example.com')
('email8', 'email8@example.com')
('email9', 'email9@example.com')
('email10', 'email10@example.com')


In [63]:
# Invoke the Langchain pipeline with desired parameters
try:
    result = bad_chain.invoke({
        "email_type": "business",  # Customize email type here
        "format_instructions": parser.get_format_instructions()
    })

    # These are the emails:
    print(result.emails)

    # Validate the generated emails using your validate_email function
    for email in result.emails:
        print(validate_email(email))
except OutputParserException as e:
    # Handle error here
    print(e.llm_output)
    print(e.observation)
    print(e.args)

{"emails": ["email1@example.com", "email2@example.com"]}
None
('Failed to parse Emails from completion {"emails": ["email1@example.com", "email2@example.com"]}. Got: 1 validation error for Emails\nemails\n  At least 10 emails must be generated. (type=value_error)',)


---


## Task Decomposition for Creating a Story

Given that your story generation will require multiple sequential prompts you can use a `SequentialChain` to chain multiple prompts together. The `SequentialChain` will automatically re-use the output of the previous prompt and use it as the input for the next prompt.


In [27]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SequentialChain

In [28]:
character_generation_prompt = PromptTemplate(
    template="""I want you to brainstorm 3 - 5 characters for my short story. The genre is {genre}.
    Each character must have a Name and a Biography.
    You must provide a name and biography for each character, this is very important!
    ---
    Example response:
    Name: CharWiz, Biography: A wizard who is a master of magic.
    Name: CharWar, Biography: A warrior who is a master of the sword.
    ---

    Characters: """,
    input_variables=["genre"],
)

plot_generation_prompt = PromptTemplate(
    template="""Given the following characters and the genre, create an effective plot for a short story:
    Characters:
    {characters}
    ---
    Genre: {genre}
    ---
    Plot: """,
    input_variables=["genre", "characters"],
)

scene_generation_plot = PromptTemplate(
    template="""Act as an effective content creator.
    Given multiple characters and a plot you are responsible generating the various scenes for each act.

    You must de-compose the plot into multiple effective scenes:

    ---
    Characters:
    {characters}
    ---
    Genre: {genre}
    ---
    Plot: {plot}
    ---
    Example response:
    Scenes:
    Scene 1: Some text here.
    Scene 2: Some text here.
    Scene 3: Some text here.
    ----
    Scenes:
    """,
    input_variables=["genre", "characters", "plot"],
)

In [29]:
chat_model = ChatOpenAI()
character_generation_chain = LLMChain(
    llm=chat, prompt=character_generation_prompt, output_key="characters"
)
plot_generation_chain = LLMChain(
    llm=chat, prompt=plot_generation_prompt, output_key="plot"
)
scene_generation_chain = LLMChain(
    llm=chat, prompt=scene_generation_plot, output_key="scenes"
)

In [30]:
story_chain = SequentialChain(
    chains=[
        character_generation_chain,
        plot_generation_chain,
        scene_generation_chain,
    ],
    input_variables=["genre"],
    output_variables=["characters", "plot", "scenes", "genre"],
)

In [31]:
story_result = story_chain({"genre": "fantasy"})

---


## Sequential Story Scene Generation:


In [35]:
from langchain.memory import SimpleMemory

# Extracting the scenes using .split('\n') and removing empty strings:
scenes = [scene for scene in story_result["scenes"].split("\n") if scene]

character_script_prompt = PromptTemplate(
    template="""Given the following characters: {characters} and the genre: {genre}, create an effective character script for a scene.

    You must follow the following principles:
    - Use the Previous Scene Summary: {previous_scene_summary} to avoid repeating yourself.
    - Use the Plot: {plot} to create an effective scene character script.
    - Currently you are generating the character dialogue script for the following scene: {scene}

    ---
    Here is an example response:
    SCENE 1: ANNA'S APARTMENT

    (ANNA is sorting through old books when there is a knock at the door. She opens it to reveal JOHN.)
    ANNA: Can I help you, sir?
    JOHN: Perhaps, I think it's me who can help you. I heard you're researching time travel.
    (Anna looks intrigued but also cautious.)
    ANNA: That's right, but how do you know?
    JOHN: You could say... I'm a primary source.

    ---
    SCENE {index}:

    """,
    input_variables=[
        "characters",
        "genre",
        "plot",
        "scene",
        "previous_scene_summary",
        "index",
    ],
)

summarize_prompt = PromptTemplate(
    template="""Given a character script create a summary of the scene. Character script: {character_script}""",
    input_variables=["character_script"],
)

character_script_chain = LLMChain(
    llm=chat, prompt=character_script_prompt, output_key="character_script"
)

summarize_chain = LLMChain(llm=chat, prompt=summarize_prompt, output_key="summary")
previous_scene_summary = ""

# Creating a simple memory to store the plot, characters, and genre:
memory = SimpleMemory(
    memories={
        "plot": story_result["plot"],
        "characters": story_result["characters"],
        "genre": "fantasy",
    }
)

generated_scenes = []

# Just do the first 3 scenes:
for index, scene in enumerate(scenes[0:3]):
    print(f"Generating scene {index + 1}...")
    # 1. Creating the scene generation chain:
    scene_generation_chain = SequentialChain(
        chains=[character_script_chain, summarize_chain],
        memory=memory,
        input_variables=[
            "scene",
            "previous_scene_summary",
            "index",
        ],
        verbose=True,
        output_variables=["character_script", "summary"],
    )

    # 2. Generate the scene, given the previous character script and summary:
    llm_result = scene_generation_chain(
        {
            "characters": story_result["characters"],
            "genre": "fantasy",
            "plot": story_result["plot"],
            "scene": scene,
            "previous_scene_summary": previous_scene_summary,
            "index": index + 1,
        }
    )

    # Updating the previous scene summary:
    previous_scene_summary = llm_result["summary"]

    # Store the generated scenes:
    generated_scenes.append(
        {"character_script": llm_result["character_script"], "scene": scenes[index]}
    )

Generating scene 1...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
Generating scene 2...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
Generating scene 3...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [36]:
generated_scenes

[{'character_script': "SERAPHINA'S VILLAGE - DAY\n\n(The village of Emberhaven is bustling with activity. People go about their daily tasks, casting curious glances at Seraphina who stands in the center of the village square. Seraphina, dressed in flowing robes, holds out her hands and conjures a small flame, much to the amazement of the villagers.)\n\nVILLAGER 1: (whispering) It's her, the sorceress. Do you think she'll burn down the village?\n\nVILLAGER 2: (nervously) I heard she can control fire. What if she loses control?\n\nSERAPHINA: (calmly) My fellow villagers, there is no need to fear. I am Seraphina, and I have dedicated my life to protecting this village. My powers are not meant to harm, but to bring light and warmth to those in need.\n\nVILLAGER 3: (skeptical) How can we trust you? We've heard tales of sorcery gone wrong.\n\nSERAPHINA: I understand your concerns, but I assure you, with the guidance of the wise old witch, I have learned to control my powers. I have only ever

In [37]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
import pandas as pd

In [38]:
df = pd.DataFrame(generated_scenes)

In [39]:
all_character_script_text = "\n".join(df.character_script.tolist())

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1500, chunk_overlap=200
)

docs = text_splitter.create_documents([all_character_script_text])

In [40]:
chain = load_summarize_chain(llm=chat, chain_type="map_reduce")
summary = chain.run(docs)
print(summary)

Seraphina, along with Alistair and Evangeline, is trusted by the villagers and appointed by the Council to retrieve a powerful artifact and protect their realm. They embark on a dangerous quest to restore balance and prevent misuse of the artifact.
