### Output Parser

In [7]:
from openai import OpenAI
import os
from dotenv import load_dotenv, find_dotenv
from langchain.llms import OpenAI as OpenAILC
from langchain.chat_models import ChatOpenAI
from langchain.prompts import SystemMessagePromptTemplate, AIMessagePromptTemplate, ChatPromptTemplate,  HumanMessagePromptTemplate

In [8]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [9]:
llm=ChatOpenAI(openai_api_key=client.api_key, model_name="gpt-3.5-turbo")

### CSV Parser

In [10]:
from langchain.output_parsers import CommaSeparatedListOutputParser

In [11]:
csv_parser = CommaSeparatedListOutputParser()

In [12]:
csv_parser.get_format_instructions()

'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'

In [13]:
human_template = "{instruction}\n{format_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [14]:
chat = ChatPromptTemplate.from_messages([human_prompt])

In [15]:
prompt = chat.format_prompt(instruction="List the 5 cities with the hightest populations",
                   format_instructions=csv_parser.get_format_instructions()).to_messages()
prompt

[HumanMessage(content='List the 5 cities with the hightest populations\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`')]

In [20]:
reply=llm(prompt)
reply.content

'Tokyo, Shanghai, Karachi, Beijing, Delhi'

In [21]:
csv_parser.parse(reply.content)

['Tokyo', 'Shanghai', 'Karachi', 'Beijing', 'Delhi']

In [None]:
list(reply.content)
# compare with parser's output, this list of chars is not that helpful.

### Datetime Parser

In [16]:
from langchain.output_parsers import DatetimeOutputParser

In [17]:
datetime_parser = DatetimeOutputParser()

In [18]:
datetime_parser.get_format_instructions()

"Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.\n\nExamples: 1606-06-03T00:45:55.332436Z, 830-06-15T10:15:37.188538Z, 1230-07-14T04:53:24.192581Z\n\nReturn ONLY this string, no other words!"

In [19]:
human_template = "{instruction}\n{format_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(human_template)
system_prompt = SystemMessagePromptTemplate.from_template("You always give responses with datetime formats.")

In [20]:
chat = ChatPromptTemplate.from_messages([system_prompt, human_prompt])

In [21]:
prompt = chat.format_prompt(
    instruction="The data for when the Netscape browser was launched.",
    format_instructions=datetime_parser.get_format_instructions()
).to_messages()
prompt

[SystemMessage(content='You always give responses with datetime formats.'),
 HumanMessage(content="The data for when the Netscape browser was launched.\nWrite a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.\n\nExamples: 479-10-10T18:39:02.462496Z, 2019-06-06T12:33:05.114250Z, 764-10-17T16:13:07.885448Z\n\nReturn ONLY this string, no other words!")]

In [35]:
reply=llm(prompt)
reply.content

'1994-12-15T00:00:00.000000Z'

In [36]:
datetime_parser.parse(reply.content)

datetime.datetime(1994, 12, 15, 0, 0)

In [None]:
# By comparision, let's use datetime package to parse the same string
import datetime
datetime.datetime.strptime(reply.content, "%Y-%m-%dT%H:%M:%S.%fZ")
# You can achieve the same result if you know the fomat codes in advance.

### Pydantic Parser

This parser allow people to create their own format of parsers

In [1]:
from langchain.output_parsers import PydanticOutputParser

In [2]:
from pydantic import BaseModel, Field

In [3]:
class Playlist(BaseModel):
    name: str = Field(description="Name of the playlist")
    songs: list = Field(description="A list of songs in the playlist")

In [4]:
parser = PydanticOutputParser(pydantic_object=Playlist)

In [5]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"description": "Name of the playlist", "title": "Name", "type": "string"}, "songs": {"description": "A list of songs in the playlist", "items": {}, "title": "Songs", "type": "array"}}, "required": ["name", "songs"]}
```


In [23]:
human_text = "{instruction}\n{format_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(human_text)
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [24]:
prompt = chat_prompt.format_prompt(
    instruction="Create a playlist for disco songs of 1980s",
    format_instructions=parser.get_format_instructions()
).to_messages()

In [25]:
reply=llm(prompt)
print(reply.content)
print(parser.parse(reply.content))

{
	"name": "Disco Hits of the 1980s",
	"songs": [
		"Le Freak - Chic",
		"Stayin' Alive - Bee Gees",
		"Super Freak - Rick James",
		"Funky Town - Lipps Inc.",
		"Upside Down - Diana Ross",
		"Boogie Wonderland - Earth, Wind & Fire",
		"Can't Stop the Music - Village People",
		"Got to Be Real - Cheryl Lynn",
		"Last Dance - Donna Summer",
		"You Should Be Dancing - Bee Gees"
	]
}
name='Disco Hits of the 1980s' songs=['Le Freak - Chic', "Stayin' Alive - Bee Gees", 'Super Freak - Rick James', 'Funky Town - Lipps Inc.', 'Upside Down - Diana Ross', 'Boogie Wonderland - Earth, Wind & Fire', "Can't Stop the Music - Village People", 'Got to Be Real - Cheryl Lynn', 'Last Dance - Donna Summer', 'You Should Be Dancing - Bee Gees']


In [26]:
reply

AIMessage(content='{\n\t"name": "Disco Hits of the 1980s",\n\t"songs": [\n\t\t"Le Freak - Chic",\n\t\t"Stayin\' Alive - Bee Gees",\n\t\t"Super Freak - Rick James",\n\t\t"Funky Town - Lipps Inc.",\n\t\t"Upside Down - Diana Ross",\n\t\t"Boogie Wonderland - Earth, Wind & Fire",\n\t\t"Can\'t Stop the Music - Village People",\n\t\t"Got to Be Real - Cheryl Lynn",\n\t\t"Last Dance - Donna Summer",\n\t\t"You Should Be Dancing - Bee Gees"\n\t]\n}', response_metadata={'token_usage': {'completion_tokens': 128, 'prompt_tokens': 205, 'total_tokens': 333}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d65d4260-658a-4f97-ba18-b9f3b0c60788-0')