### Output Parsers

In [1]:
import os
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache
import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('../openai_api_key.txt','r') as f:
    api_key = f.read()
    
os.environ['OPENAI_API_KEY'] = api_key

llm = OpenAI()
chat = ChatOpenAI()

set_llm_cache(InMemoryCache())

#### 1. Comma Separated List Output Parser

In [3]:
# step 1: get parser
from langchain.output_parsers import CommaSeparatedListOutputParser
output_parser = CommaSeparatedListOutputParser()
output_parser

CommaSeparatedListOutputParser()

In [4]:
# step 2: get format instructions
output_parser.get_format_instructions()

'Your response should be a list of comma separated values, eg: `foo, bar, baz`'

In [5]:
# step 3: send instructions to the model
from langchain.prompts.chat import HumanMessagePromptTemplate, ChatPromptTemplate

human_template = "{human_message}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(human_message = "What are the 7 continents?", format_instructions = output_parser.get_format_instructions())
prompt

ChatPromptValue(messages=[HumanMessage(content='What are the 7 continents?\nYour response should be a list of comma separated values, eg: `foo, bar, baz`')])

In [6]:
response = chat(messages = prompt.messages)
response.content

'North America, South America, Europe, Asia, Africa, Australia, Antarctica'

In [7]:
# step 4: use parser to parse output
output_parser.parse(response.content)

['North America',
 'South America',
 'Europe',
 'Asia',
 'Africa',
 'Australia',
 'Antarctica']

In [8]:
# What if Parser fails?
from langchain.output_parsers import DatetimeOutputParser
output_parser = DatetimeOutputParser()
format_instructions = output_parser.get_format_instructions()
format_instructions

"Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.\n\nExamples: 1803-06-30T22:51:22.457039Z, 0048-01-22T06:08:05.341079Z, 1633-01-03T21:27:10.518814Z\n\nReturn ONLY this string, no other words!"

In [9]:
human_template = "{human_message}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])

prompt = chat_prompt.format_prompt(human_message = 'When was christ born? ', format_instructions = format_instructions)

In [10]:
response = chat(messages = prompt.messages)
response.content

'0000-12-25T00:00:00.000000Z'

In [11]:
output_parser.parse(response.content)

OutputParserException: Could not parse datetime string: 0000-12-25T00:00:00.000000Z

#### Output Fixing Parser

In [12]:
from langchain.output_parsers import OutputFixingParser
fixing_parser = OutputFixingParser.from_llm(parser = output_parser, llm = chat)
fixed_output = fixing_parser.parse(response.content)
fixed_output

datetime.datetime(1, 1, 1, 0, 0)

In [13]:
# fixing may not work always, trying multiple times 
for chance in range(1,10):
    try:
        fixed_output = fixing_parser.parse(response.content)
    except:
        continue
    else:
        break
fixed_output

datetime.datetime(1, 1, 1, 0, 0)

### Custom Parsers

#### Structured Output Parser

In [14]:
# define response schema
from langchain.output_parsers import ResponseSchema

response_schema = [
    ResponseSchema(name = 'answer', description="answer to user's question"),
    ResponseSchema(name = 'source',description="source used to answer to user's question, should be a website")
]

In [15]:
# define output parser
from langchain.output_parsers import StructuredOutputParser
output_parser = StructuredOutputParser.from_response_schemas(response_schema)
output_parser

StructuredOutputParser(response_schemas=[ResponseSchema(name='answer', description="answer to user's question", type='string'), ResponseSchema(name='source', description="source used to answer to user's question, should be a website", type='string')])

In [16]:
# format instructions
format_instructions = output_parser.get_format_instructions()
format_instructions

'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"answer": string  // answer to user\'s question\n\t"source": string  // source used to answer to user\'s question, should be a website\n}\n```'

In [17]:
# get response
human_template = "{human_message}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])

prompt = chat_prompt.format_prompt(human_message = "What is the largest animal in the world? ", format_instructions = format_instructions)

response = chat(messages = prompt.messages)
response.content

'```json\n{\n\t"answer": "Blue Whale",\n\t"source": "https://www.nationalgeographic.com/animals/mammals/b/blue-whale/"\n}\n```'

In [18]:
output_parser.parse(response.content)

{'answer': 'Blue Whale',
 'source': 'https://www.nationalgeographic.com/animals/mammals/b/blue-whale/'}

#### Pydantic Output Parser 

In [19]:
# conventional way of writing classes
class Student:
    def __init__(self, name: str):
        self.name = name
john = Student(name = 'John')
john.name

'John'

In [20]:
# similarly
jane = Student(name = 1) # takes int even after defining name to be str
jane.name

1

Conventional methods dont have strict type validation. Pydantic has strict type validation

In [21]:
from pydantic import BaseModel

In [28]:
class Student1(BaseModel):
    name : str
    
john = Student1(name = 'John')
john.name

'John'

In [29]:
jane = Student1(name = 10)
jane.name

'10'

In [30]:
from pydantic import BaseModel, Field
from typing import List

class Car(BaseModel):
    name : str = Field(description="Name of the car")
    model_number : str = Field(description="model number of the car")
    features : List[str] = Field(description="List of features of the car")
    source: str = Field(description = 'Source of the answer. Should only contain website')

In [31]:
# output parser
from langchain.output_parsers import PydanticOutputParser
output_parser = PydanticOutputParser(pydantic_object=Car)
output_parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"title": "Name", "description": "Name of the car", "type": "string"}, "model_number": {"title": "Model Number", "description": "model number of the car", "type": "string"}, "features": {"title": "Features", "description": "List of features of the car", "type": "array", "items": {"type": "string"}}, "source": {"title": "Source", "description": "Source of the answer. Should only contain website", "type": "string"}}, "required": ["name", "model_number", "features", "source"]}\n```'

In [32]:
human_template = "{human_message}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(human_message='Tell me about the most expensive car in the world',
                                   format_instructions=output_parser.get_format_instructions())

response = chat(messages=prompt.to_messages())
output = output_parser.parse(response.content)

output

Car(name='Bugatti La Voiture Noire', model_number='La Voiture Noire', features=['8.0-liter quad-turbo W-16 engine', '1500 horsepower', 'Top speed of 261 mph', 'Only one unit produced', 'Handcrafted bodywork', 'Luxurious interior'], source='https://www.caranddriver.com/news/a27053025/bugatti-la-voiture-noire-most-expensive-car-sold/')