# Parsing Output

Let's set up a Chat Model:

In [83]:
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate,ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv, find_dotenv
import os

load_dotenv(find_dotenv(), override=True)
api_key = os.getenv("OPENAI_API_KEY")
model = ChatOpenAI(model_name="gpt-3.5-turbo",openai_api_key=api_key)

## List Parsing 

In [2]:
from langchain.output_parsers import CommaSeparatedListOutputParser

In [3]:
output_parser = CommaSeparatedListOutputParser()

In [4]:
format_instructions = output_parser.get_format_instructions()

In [5]:
print(format_instructions)

Your response should be a list of comma separated values, eg: `foo, bar, baz`


In [6]:
reply = "one, two, three"
output_parser.parse("one, two, three")

['one', 'two', 'three']

In [7]:
human_template = '{request} {format_instructions}'
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [8]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

chat_prompt.format_prompt(request="give me 5 characteristics of data science professional",
                   format_instructions = output_parser.get_format_instructions())

ChatPromptValue(messages=[HumanMessage(content='give me 5 characteristics of data science professional Your response should be a list of comma separated values, eg: `foo, bar, baz`')])

In [9]:
request = chat_prompt.format_prompt(request="give me 5 characteristics of data science professional",
                   format_instructions = output_parser.get_format_instructions()).to_messages()

In [10]:
result = model(request)

In [11]:
result.content

'Analytical, curious, problem-solving, detail-oriented, programming skills'

In [12]:
# Convert to desired output:
output_parser.parse(result.content)

['Analytical',
 'curious',
 'problem-solving',
 'detail-oriented',
 'programming skills']

## Datetime Parser 

In [24]:
from langchain.output_parsers import DatetimeOutputParser

In [25]:
output_parser = DatetimeOutputParser()

In [59]:
print(output_parser.get_format_instructions())

Write a datetime string that matches the 
            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 0530-05-15T14:07:07.300634Z, 1487-05-17T14:11:42.074173Z, 1331-06-05T10:50:14.533165Z


In [60]:
template_text = "{request}\n{format_instructions}"
human_prompt=HumanMessagePromptTemplate.from_template(template_text)

In [61]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [62]:
print(chat_prompt.format(request="Microsoft was founded in ",
                   format_instructions=output_parser.get_format_instructions()
                   ))

Human: Microsoft was founded in 
Write a datetime string that matches the 
            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 0068-01-21T12:01:34.175394Z, 0146-05-30T22:23:02.026834Z, 0450-09-16T17:08:56.280363Z


In [63]:
request = chat_prompt.format_prompt(request="Microsoft was founded in",
                   format_instructions=output_parser.get_format_instructions()
                   ).to_messages()

In [64]:
result = model(request,temperature=0)

In [65]:
# Careful with this, it sometimes will include extra information!
result.content

'Microsoft was founded on April 4, 1975. The datetime string that matches the given pattern "%Y-%m-%dT%H:%M:%S.%fZ" for Microsoft\'s founding date would be "1975-04-04T00:00:00.000000Z".'

In [66]:
result.content

'Microsoft was founded on April 4, 1975. The datetime string that matches the given pattern "%Y-%m-%dT%H:%M:%S.%fZ" for Microsoft\'s founding date would be "1975-04-04T00:00:00.000000Z".'

In [71]:
output_parser.parse(result.content)

OutputParserException: Could not parse datetime string: The Microsoft IDC campus was set up on the following date: 1986-01-01T00:00:00.000000Z

---

# Methods to Fix Parsing Issues

## Auto-Fix Parser

In [67]:
from langchain.output_parsers import OutputFixingParser

output_parser = DatetimeOutputParser()

misformatted = result.content

In [68]:
misformatted

'Microsoft was founded on April 4, 1975. The datetime string that matches the given pattern "%Y-%m-%dT%H:%M:%S.%fZ" for Microsoft\'s founding date would be "1975-04-04T00:00:00.000000Z".'

In [69]:
new_parser = OutputFixingParser.from_llm(parser=output_parser, llm=model)

In [70]:
new_parser.parse(misformatted)

datetime.datetime(1975, 4, 4, 0, 0)

____
### Fixing via System Prompt:

In [39]:
system_prompt = SystemMessagePromptTemplate.from_template("You always reply to questions only in datetime patterns.")
template_text = "{request}\n{format_instructions}"
human_prompt=HumanMessagePromptTemplate.from_template(template_text)

In [40]:
chat_prompt = ChatPromptTemplate.from_messages([system_prompt,human_prompt])

In [41]:
print(chat_prompt.format(request="Microsoft was founded in",
                   format_instructions=output_parser.get_format_instructions()
                   ))

System: You always reply to questions only in datetime patterns.
Human: Microsoft was founded in
Write a datetime string that matches the 
            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 1362-03-21T21:28:17.884178Z, 0164-01-22T05:37:16.066620Z, 1477-11-30T14:34:55.687279Z


In [42]:
request = chat_prompt.format_prompt(request="Microsoft was founded in",
                   format_instructions=output_parser.get_format_instructions()
                   ).to_messages()

In [51]:
fixed_result = model(request,temperature=0)

In [52]:
fixed_result.content

'1975-04-04T00:00:00.000000Z'

In [53]:
output_parser.parse(fixed_result.content)

datetime.datetime(1975, 4, 4, 0, 0)

## Pydantic JSON Parser
You should also be aware of OpenAI's own JSON offerings (which are still quite new at this time!): https://platform.openai.com/docs/guides/gpt/function-calling


In [71]:
!pip install pydantic



In [72]:
from langchain.output_parsers import PydanticOutputParser

In [73]:
from pydantic import BaseModel, Field

In [74]:
class Scientist(BaseModel):
    
    name: str = Field(description="Name of a Scientist")
    discoveries: list = Field(description="Python list of discoveries")

In [75]:
query = 'Name a famous scientist and a list of their discoveries' 

In [76]:
parser = PydanticOutputParser(pydantic_object=Scientist)

In [77]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"title": "Name", "description": "Name of a Scientist", "type": "string"}, "discoveries": {"title": "Discoveries", "description": "Python list of discoveries", "type": "array", "items": {}}}, "required": ["name", "discoveries"]}
```


In [79]:
human_prompt = HumanMessagePromptTemplate.from_template("{query}\n{format_instruction}")

In [80]:
chat_propt = ChatPromptTemplate.from_messages([human_prompt])

In [81]:
request = chat_prompt.format_prompt(request ="Tell me about a famous scientist", format_instructions= parser.get_format_instructions()).to_messages()

In [84]:
result = model(request, temperature = 0)

In [85]:
result

AIMessage(content='{"name": "Albert Einstein", "discoveries": ["Theory of Relativity", "Photoelectric Effect"]}')

In [86]:
parser.parse(result.content)

Scientist(name='Albert Einstein', discoveries=['Theory of Relativity', 'Photoelectric Effect'])

In [87]:
type(parser.parse(result.content))

__main__.Scientist