# Output Parser

## CSV Parser

In [71]:
import openai
import os
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate, 
    SystemMessagePromptTemplate, 
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate
)

In [72]:
os.environ["OPENAI_API_KEY"] = "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"
openai.api_key = os.environ["OPENAI_API_KEY"]

In [73]:
llm = ChatOpenAI(openai_api_key=openai.api_key, model_name="gpt-3.5-turbo")

In [74]:
from langchain.output_parsers import CommaSeparatedListOutputParser

In [75]:
csv_parser = CommaSeparatedListOutputParser()

In [76]:
csv_parser.get_format_instructions()

'Your response should be a list of comma separated values, eg: `foo, bar, baz`'

In [77]:
human_template = "{instruction}\n{format_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [78]:
chat = ChatPromptTemplate.from_messages([human_template])

In [79]:
prompt = chat.format_prompt(
    instruction="List the 5 cities with the highest populations",
    format_instructions=csv_parser.get_format_instructions()
).to_messages()

In [80]:
prompt

[HumanMessage(content='List the 5 cities with the highest populations\nYour response should be a list of comma separated values, eg: `foo, bar, baz`')]

In [81]:
reply = llm(prompt)
reply.content

'Tokyo, Delhi, Shanghai, São Paulo, Mumbai'

In [82]:
csv_parser.parse(reply.content)

['Tokyo', 'Delhi', 'Shanghai', 'São Paulo', 'Mumbai']

## Date Time Parser

In [83]:
from langchain.output_parsers import DatetimeOutputParser

In [84]:
datetime_parser = DatetimeOutputParser()

In [85]:
datetime_parser.get_format_instructions()

'Write a datetime string that matches the \n            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 0404-01-18T11:29:50.513346Z, 1924-10-26T08:36:51.841660Z, 0762-12-24T05:31:10.162449Z'

In [86]:
human_template = "{instruction}\n{formant_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [87]:
system_template = "Your responses while only contain dates in datetime formats and no other explanations."
system_prompt = SystemMessagePromptTemplate.from_template(system_template)

In [88]:
chat = ChatPromptTemplate.from_messages([system_prompt, human_prompt])

In [89]:
prompt = chat.format_prompt(
    instruction="What is the date when the Netscape browser was lunched?",
    formant_instructions=datetime_parser.get_format_instructions()
).to_messages()

In [90]:
reply = llm(prompt)
reply.content

'1994-12-15T00:00:00.000000Z'

In [91]:
datetime_parser.parse(reply.content)

datetime.datetime(1994, 12, 15, 0, 0)

## Custom Output Parser

In [92]:
!pip install pydantic



In [93]:
from langchain.output_parsers import PydanticOutputParser

In [94]:
from pydantic import BaseModel, Field

In [95]:
class Playlist(BaseModel):
    name: str = Field(description="Name of the playlist")
    songs: list = Field(description="A list of songs in the playlist")

In [96]:
parser = PydanticOutputParser(pydantic_object=Playlist)

In [97]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"description": "Name of the playlist", "title": "Name", "type": "string"}, "songs": {"description": "A list of songs in the playlist", "items": {}, "title": "Songs", "type": "array"}}, "required": ["name", "songs"]}
```


In [98]:
human_template = "{instruction}\n{format_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [99]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [100]:
prompt = chat_prompt.format_prompt(
    instruction="Create a playlist for disco songs.",
    format_instructions=parser.get_format_instructions()
).to_messages()

In [101]:
reply = llm(prompt)
reply.content

'{\n  "name": "Disco Songs",\n  "songs": [\n    {\n      "title": "Stayin\' Alive",\n      "artist": "Bee Gees",\n      "duration": "4:45"\n    },\n    {\n      "title": "Le Freak",\n      "artist": "Chic",\n      "duration": "3:36"\n    },\n    {\n      "title": "Dancing Queen",\n      "artist": "ABBA",\n      "duration": "3:51"\n    },\n    {\n      "title": "Y.M.C.A.",\n      "artist": "Village People",\n      "duration": "3:44"\n    },\n    {\n      "title": "Funky Town",\n      "artist": "Lipps Inc.",\n      "duration": "3:59"\n    }\n  ]\n}'

In [102]:
parser.parse(reply.content)

Playlist(name='Disco Songs', songs=[{'title': "Stayin' Alive", 'artist': 'Bee Gees', 'duration': '4:45'}, {'title': 'Le Freak', 'artist': 'Chic', 'duration': '3:36'}, {'title': 'Dancing Queen', 'artist': 'ABBA', 'duration': '3:51'}, {'title': 'Y.M.C.A.', 'artist': 'Village People', 'duration': '3:44'}, {'title': 'Funky Town', 'artist': 'Lipps Inc.', 'duration': '3:59'}])