In [None]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langchain_openai import ChatOpenAI
import os
#os.environ.clear()  #to clear .env cache
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
chat = ChatOpenAI()
llm = init_chat_model("gpt-4o-mini", model_provider="openai")

In [None]:
#step 1 - import parser, make instance
from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()

In [None]:
#step 2 -format instructions

# code to view what format instructions are
output_parser.get_format_instructions()

reply = "red, blue, green"
output_parser.parse(reply)

In [None]:
human_template = "{request}\n{format_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [7]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])


In [8]:
model_request = chat_prompt.format_prompt(request="give me 5 characteristics of Pikachu", format_instructions=output_parser.get_format_instructions()).to_messages()

In [11]:
result = llm.invoke(model_request)

#parse into a list
output_parser.parse(result.content)

['Electric-type', 'small', 'yellow', 'cute', 'can generate electricity']

In [13]:
#show content
result.content

'Electric-type, small, yellow, cute, can generate electricity'

In [14]:
#SECTION 2

from langchain.output_parsers import DatetimeOutputParser

output_parser = DatetimeOutputParser()
#get formatting info
output_parser.get_format_instructions()

"Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.\n\nExamples: 1148-03-21T16:20:30.479424Z, 1632-06-07T16:22:00.704193Z, 0371-04-15T14:06:07.414087Z\n\nReturn ONLY this string, no other words!"

In [20]:
#create template text
template_text = "{request}\n{format_instructions}"
#create a human prompt
human_prompt = HumanMessagePromptTemplate.from_template(template_text)
#create a system prompt
system_prompt = SystemMessagePromptTemplate.from_template("You always reply to questions only in datetime patterns.")

In [22]:
#create a chat prompt
chat_prompt = ChatPromptTemplate.from_messages([human_prompt, system_prompt])

In [23]:
#format
model_request = chat_prompt.format_prompt(request="What date was the 13th amendment ratified in the US?", format_instructions=output_parser.get_format_instructions()).to_messages()

In [24]:
result = llm.invoke(model_request, temperature=0)

result.content

'1865-12-06T00:00:00.000000Z'

In [25]:
output_parser.parse(result.content)

datetime.datetime(1865, 12, 6, 0, 0)

In [None]:
# *** if results are not formatted in only datetime format, send back to model with instructions to fix it ***

#from langchain.output_parsers import OutputFixingParser
#incorrect_format = result.content

#new_parser = OutputFixingParser.from_llm(parser=output_parser, llm= llm)

#new_parser.parse(incorrect_format)

In [26]:
# SECTION 3
# pip install pydantic

from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

#create a base class
class Scientist(BaseModel):

    name: str = Field(description="Name of Scientist")
    discoveries: list = Field(description="List of discoveries")

In [27]:
parser = PydanticOutputParser(pydantic_object=Scientist)

In [28]:
#check format instructions
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"description": "Name of Scientist", "title": "Name", "type": "string"}, "discoveries": {"description": "List of discoveries", "items": {}, "title": "Discoveries", "type": "array"}}, "required": ["name", "discoveries"]}
```


In [29]:
#prompt templates
human_prompt = HumanMessagePromptTemplate.from_template("{request}\n{format_instructions}")

chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [30]:
#build out request
request =chat_prompt.format_prompt(request="Tell me about a famous scientist",
                                   format_instructions=parser.get_format_instructions()).to_messages()

In [33]:
result = llm.invoke(request, temperature=0)
type(parser.parse(result.content))

__main__.Scientist