# LangChain Part 3: Output Parsing
- Important when using agents with tools e.g. calculator , search engine 
- Many systems expect data in specific way

In [1]:
from langchain_openai import ChatOpenAI
from langchain_ollama.chat_models import ChatOllama
from os import getenv
from dotenv import load_dotenv
from pprint import pprint

load_dotenv()

# llm = ChatOpenAI(
#     openai_api_key=getenv("OPENROUTER_API_KEY"),
#     openai_api_base="https://openrouter.ai/api/v1",
#     model="meta-llama/llama-3.2-3b-instruct:free",
#     temperature=0.5,

# )
llm = ChatOllama(model='qwen2.5:0.5b', temperature=0.5, max_tokens=512)

result = llm.invoke("Hello, how are you today?")
print(result.content)

Hello! I'm just a virtual AI assistant, so I don't have feelings or emotions like humans do. However, I'm here to help answer any questions you might have. How can I assist you today?


# Parsing Lists

In [None]:
from flask import request
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate

# Initialize an output parser that will parse the response into a comma-separated list
output_parser = CommaSeparatedListOutputParser()

# Get format instructions from the output parser to guide the language model on how to format its response
format_instructions = output_parser.get_format_instructions()
print(format_instructions)  # Print the format instructions

# Define a template for the human message that includes both the request and the format instructions
human_template = "{request} {format_instructions}"

# Create a HumanMessagePromptTemplate from the defined template
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

# Create a ChatPromptTemplate using the human prompt template. This template will be used to structure the chat messages
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

# Format the chat prompt with a specific request and format instructions
formatted_prompt = chat_prompt.format_prompt(
    request="give me 5 characteristics of dogs",
    format_instructions=output_parser.get_format_instructions()
)

# Convert the formatted prompt into a list of messages that can be sent to the language model
request_messages = formatted_prompt.to_messages()

# Invoke the language model (llm) with the formatted request messages and store the result
result = llm.invoke(request_messages)

# Print the content of the response from the language model
print(result.content)

Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`
```json
["loyal", "social", "aggressive", "caring", "friendly"]
```


In [8]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

# Initialize parser
output_parser = CommaSeparatedListOutputParser()

# Create template for travel recommendations
human_template = """List 5 popular travel destinations and their best season to visit.
Format each as 'City, Country - Season'.
{format_instructions}"""

# Create prompt templates
human_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

# Format the prompt
formatted_prompt = chat_prompt.format_prompt(
    format_instructions=output_parser.get_format_instructions()
)

# Get messages and invoke LLM
request_messages = formatted_prompt.to_messages()
result = llm.invoke(request_messages)

# Parse and display results
destinations = output_parser.parse(result.content)
print("\nRecommended Destinations:")
for destination in destinations:
    print(f"- {destination}")


Recommended Destinations:
- Barcelona
- Spain - Spring - warm weather with mild temperatures and sunny days.
Copenhagen
- Denmark - Summer - beautiful beaches
- vibrant nightlife and rich culture.
Rome
- Italy - Autumn - golden leaves
- cozy cafes
- and delicious cuisine.
Tokyo
- Japan - Winter - cold winters with snow-covered streets and cherry blossoms blooming in parks.
Sydney
- Australia - Spring - warm weather with warm temperatures and sunny days.


# JSON Output Parsing

In [12]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field


# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")


# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser



In [13]:
print(parser.get_format_instructions())


The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"setup": {"description": "question to set up a joke", "title": "Setup", "type": "string"}, "punchline": {"description": "answer to resolve the joke", "title": "Punchline", "type": "string"}}, "required": ["setup", "punchline"]}
```


## Streaming Output

In [15]:
for s in chain.stream({"query": joke_query}):
    print(s)

In [16]:
joke_query = "Tell me a joke."

parser = JsonOutputParser()

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": joke_query})

{'joke': "Why couldn't the bicycle stand up by itself? Because it was two-tired!"}

# XML

In [None]:
from langchain_core.output_parsers import XMLOutputParser
from langchain_core.prompts import PromptTemplate

actor_query = "Generate the shortened filmography for Tom Hanks."

output = llm.invoke(
    f"""{actor_query}
Please enclose the movies in <movie></movie> tags"""
)

print(output.content)
parser = XMLOutputParser()

# We will add these instructions to the prompt below
print(parser.get_format_instructions())

Here's a shortened filmography for Tom Hanks:

**Early Career:**

* <movie>He Knows You're Alone</movie> (1980)
* <movie>Splash</movie> (1984)
* <movie>The Money Pit</movie> (1986)
* <movie>Big</movie> (1988)

**Rise to Prominence:**

* <movie>Turner & Hooch</movie> (1989)
* <movie>The 'Burbs</movie> (1989)
* <movie>Bonfire of the Vanities</movie> (1990)
* <movie>A League of Their Own</movie> (1992)
* <movie>Sleepless in Seattle</movie> (1993)
* <movie>Philadelphia</movie> (1993)
* <movie>Forrest Gump</movie> (1994)
* <movie>Apollo 13</movie> (1995)

**Established Star:**

* <movie>Toy Story</movie> (1995)
* <movie>That Thing You Do!</movie> (1996)
* <movie>Saving Private Ryan</movie> (1998)
* <movie>You've Got Mail</movie> (1998)
* <movie>The Green Mile</movie> (1999)
* <movie>Cast Away</movie> (2000)
* <movie>Road to Perdition</movie> (2002)
* <movie>The Terminal</movie> (2004)
* <movie>The Polar Express</movie> (2004)
* <movie>Charlie Wilson's War</movie> (2007)
* <movie>Angels & De

In [None]:
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": actor_query})
parser = XMLOutputParser(tags=["movies", "actor", "film", "name", "genre"])

# We will add these instructions to the prompt below
print(parser.get_format_instructions())

{'filmography': [{'actor': [{'name': 'Tom Hanks'},
    {'movies': [{'movie': [{'title': 'Forrest Gump'}, {'year': '1994'}]},
      {'movie': [{'title': 'Cast Away'}, {'year': '2000'}]},
      {'movie': [{'title': 'Apollo 13'}, {'year': '1995'}]},
      {'movie': [{'title': 'Sully'}, {'year': '2016'}]},
      {'movie': [{'title': 'Saving Private Ryan'}, {'year': '1998'}]},
      {'movie': [{'title': 'Big'}, {'year': '1988'}]},
      {'movie': [{'title': 'Philadelphia'}, {'year': '1993'}]},
      {'movie': [{'title': 'Toy Story'}, {'year': '1995'}]},
      {'movie': [{'title': 'Toy Story 2'}, {'year': '1999'}]},
      {'movie': [{'title': 'Toy Story 3'}, {'year': '2010'}]},
      {'movie': [{'title': 'Toy Story 4'}, {'year': '2019'}]},
      {'movie': [{'title': 'The Da Vinci Code'}, {'year': '2006'}]},
      {'movie': [{'title': 'Cloud Atlas'}, {'year': '2012'}]},
      {'movie': [{'title': 'Sully'}, {'year': '2016'}]},
      {'movie': [{'title': 'The Polar Express'}, {'year': '2004'}]}

# CSV

In [13]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser 
from langchain_core.prompts import PromptTemplate

# Define a query to generate CSV output
csv_query = "Generate a CSV list of the top 5 programming languages in 2023."

# Set up a parser
parser = CommaSeparatedListOutputParser()

# Set up a prompt template with the parser instructions
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Create a chain with the prompt, llm, and parser
chain = prompt | llm | parser

# Invoke the chain with the query
result = chain.invoke({"query": csv_query})

print(result)

['JavaScript', 'Python', 'C++', 'Java', 'C#']


# Date And Time Parsing

In [None]:
from langchain.output_parsers import DatetimeOutputParser
from langchain_core.prompts import PromptTemplate

op = DatetimeOutputParser()
print(op.get_format_instructions())
template = """Answer the users question:

{question}

{format_instructions}"""

prompt = PromptTemplate.from_template(
    template,
    partial_variables={"format_instructions": op.get_format_instructions()},
)

pprint(prompt.dict())
chain = prompt | llm | op

r =chain.invoke({"question": "What is the current date and time?"})
print(r)

# Output Fixing

In [10]:
from langchain.output_parsers import OutputFixingParser
from langchain.output_parsers import DatetimeOutputParser
from torch import ne

output_parser = DatetimeOutputParser()

misformatted = result.content
print(misformatted)

new_parser = OutputFixingParser.from_llm(parser=output_parser,llm=llm)

new_parser.parse(misformatted)

I'm just a language model, so I don't have emotions or feelings like humans do, but I'm functioning properly and ready to help you with any questions or tasks you have. How can I assist you today?


datetime.datetime(2024, 11, 20, 14, 30)

# Retry with Error Handling

# Pyndatic Models

In [14]:
from pydantic import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

# Define your desired data structure.
class Filmography(BaseModel):
    early_career: list[str] = Field(description="List of movies in early career")
    rise_to_prominence: list[str] = Field(description="List of movies during rise to prominence")
    established_star: list[str] = Field(description="List of movies as an established star")
    recent_work: list[str] = Field(description="List of recent movies")

# Define a query to generate the filmography
filmography_query = "Generate the shortened filmography for Tom Hanks."

# Set up a parser
parser = JsonOutputParser(pydantic_object=Filmography)

# Set up a prompt template with the parser instructions
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Create a chain with the prompt, llm, and parser
chain = prompt | llm | parser

# Invoke the chain with the query
result = chain.invoke({"query": filmography_query})

print(result)

{'properties': {'early_career': ['Forrest Gump', 'Big', 'The Money Pit'], 'rise_to_prominence': ['Apollo 13', 'Saving Private Ryan', 'Philadelphia'], 'established_star': ['Cast Away', 'The Da Vinci Code', 'Sully'], 'recent_work': ['A Beautiful Day in the Neighborhood', 'News of the World']}, 'required': ['early_career', 'rise_to_prominence', 'established_star', 'recent_work']}


# Yaml

In [15]:
from langchain.output_parsers import YamlOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field


# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")


# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = YamlOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": joke_query})

Joke(setup="Why couldn't the bicycle stand up by itself?", punchline='Because it was two-tired.')

In [43]:
r = parser.get_format_instructions()
print(r)

The output should be formatted as a YAML instance that conforms to the given JSON schema below.

# Examples
## Schema
```
{"title": "Players", "description": "A list of players", "type": "array", "items": {"$ref": "#/definitions/Player"}, "definitions": {"Player": {"title": "Player", "type": "object", "properties": {"name": {"title": "Name", "description": "Player name", "type": "string"}, "avg": {"title": "Avg", "description": "Batting average", "type": "number"}}, "required": ["name", "avg"]}}}
```
## Well formatted instance
```
- name: John Doe
  avg: 0.3
- name: Jane Maxfield
  avg: 1.4
```

## Schema
```
{"properties": {"habit": { "description": "A common daily habit", "type": "string" }, "sustainable_alternative": { "description": "An environmentally friendly alternative to the habit", "type": "string"}}, "required": ["habit", "sustainable_alternative"]}
```
## Well formatted instance
```
habit: Using disposable water bottles for daily hydration.
sustainable_alternative: Switch t

setup="Why couldn't the bicycle stand up by itself?" punchline='Because it was two-tired.'


# Pandas Dataframe

In [None]:
import pandas as pd
from pydantic import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

# Define your desired data structure.
class ProgrammingLanguage(BaseModel):
    name: str = Field(description="Name of the programming language")
    rank: int = Field(description="Rank of the programming language")

# Define a query to generate the list of programming languages
languages_query = "Generate a list of the top 5 programming languages in 2023 with their ranks."

# Set up a parser
parser = JsonOutputParser(pydantic_object=ProgrammingLanguage)

# Set up a prompt template with the parser instructions
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Create a chain with the prompt, llm, and parser
chain = prompt | llm | parser

# Invoke the chain with the query
result = chain.invoke({"query": languages_query})

# Convert the result to a Pandas DataFrame
df = pd.DataFrame(result)
print(df)

OutputParserException: Invalid json output: Here's a Python script that generates a list of the top 5 programming languages in 2023 with their ranks, formatted as a JSON instance that conforms to the provided schema:

```python
import json

# Define the schema
schema = {
    "properties": {
        "name": {
            "description": "Name of the programming language",
            "title": "Name",
            "type": "string"
        },
        "rank": {
            "description": "Rank of the programming language",
            "title": "Rank",
            "type": "integer"
        }
    },
    "required": ["name", "rank"]
}

# Define the list of top 5 programming languages in 2023 with their ranks
top_languages = [
    {"name": "JavaScript", "rank": 1},
    {"name": "Python", "rank": 2},
    {"name": "Java", "rank": 3},
    {"name": "C++", "rank": 4},
    {"name": "C#", "rank": 5}
]

# Validate the list of top languages against the schema
def validate_schema(data):
    try:
        jsonschema.validate(instance=data, schema=schema)
        return True
    except jsonschema.exceptions.ValidationError:
        return False

# Format the list of top languages as a JSON instance
def format_json(data):
    return json.dumps(data, indent=4)

# Validate and format the list of top languages
if validate_schema(top_languages):
    formatted_json = format_json(top_languages)
    print(formatted_json)
else:
    print("Invalid schema")
```

When you run this script, it will output the following JSON instance:

```json
{
    "name": "JavaScript",
    "rank": 1
}
{
    "name": "Python",
    "rank": 2
}
{
    "name": "Java",
    "rank": 3
}
{
    "name": "C++",
    "rank": 4
}
{
    "name": "C#",
    "rank": 5
}
```

This JSON instance conforms to the provided schema, with each language represented as a separate JSON object that contains the `name` and `rank` properties.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE

# Enum

In [None]:
from enum import Enum
from langchain.output_parsers.enum import EnumOutputParser

class Colors(Enum):
    RED = "red"
    GREEN = "green"
    BLUE = "blue"
    

parser = EnumOutputParser(enum=Colors)
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

prompt = PromptTemplate.from_template(
    """What color eyes does this person have?

> Person: {person}

Instructions: {instructions}"""
).partial(instructions=parser.get_format_instructions())
chain = prompt | llm | parser

chain.invoke({"person": "This person has green eyes."})

OutputParserException: Response 'Green' is not one of the expected values: ['red', 'green', 'blue']
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE

# Structured