# Pydantic（JSON）解析器实战

In [22]:
from langchain_openai import ChatOpenAI

model_name = "gpt-4o-ca"

llm = ChatOpenAI(model_name=model_name)

import pandas as pd
df = pd.DataFrame(columns=["flower_type", "price", "description", "reason"])

flowers = ["玫瑰", "百合", "康乃馨"]
prices = ["50", "30", "20"]

from pydantic import BaseModel, Field

class FlowerDescription(BaseModel):
    flower_type: str = Field(description="鲜花的种类")
    price: int = Field(description="鲜花的价格")
    description: str = Field(description="鲜花的描述文案")
    reason: str = Field(description="为什么要这样写这个文案")
    
from langchain.output_parsers import PydanticOutputParser
output_parser = PydanticOutputParser(pydantic_object=FlowerDescription)

format_instructions = output_parser.get_format_instructions()

print("输出格式：",format_instructions)

from langchain.prompts import PromptTemplate
prompt_template = """您是一位专业的鲜花店文案撰写员。
对于售价为 {price} 元的 {flower} ，您能提供一个吸引人的简短中文描述吗？
{format_instructions}"""

prompt = PromptTemplate.from_template(prompt_template,
partial_variables={"format_instructions": format_instructions})

print("提示：", prompt)

for flower, price in zip(flowers, prices):
    input = prompt.format(flower=flower, price=price)
    print("提示：", input)
    output = llm.invoke(input)
    parsed_output = output_parser.parse(output.content)
    parsed_output_dict = parsed_output.dict() 
    df.loc[len(df)] = parsed_output.dict()

print("输出的数据：", df.to_dict(orient='records'))

输出格式： The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"flower_type": {"description": "\u9c9c\u82b1\u7684\u79cd\u7c7b", "title": "Flower Type", "type": "string"}, "price": {"description": "\u9c9c\u82b1\u7684\u4ef7\u683c", "title": "Price", "type": "integer"}, "description": {"description": "\u9c9c\u82b1\u7684\u63cf\u8ff0\u6587\u6848", "title": "Description", "type": "string"}, "reason": {"description": "\u4e3a\u4ec0\u4e48\u8981\u8fd9\u6837\u5199\u8fd9\u4e2a\u6587\u6848", "title": "Reason", "type": "string"}}, "required": ["flower_type", "price", "description", "reason"]}
```


# 自动修复解析器（OutputFixingParser）实战

In [29]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List


class Flower(BaseModel):
    name: str = Field(description="name of a flower")
    colors: List[str] = Field(description="the colors of this flower")
    
flower_query = "Generate the charaters for a random flower."

misformatted = "{'name': '康乃馨', 'colors': ['粉红色','白色','红色','紫色','黄色']}"

parser = PydanticOutputParser(pydantic_object=Flower)

# parser.parse(misformatted)

from langchain_openai import ChatOpenAI
from langchain.output_parsers import OutputFixingParser

new_parser = OutputFixingParser.from_llm(parser=parser, llm=ChatOpenAI(model_name=model_name))
result = new_parser.parse(misformatted)

print(result)

name='康乃馨' colors=['粉红色', '白色', '红色', '紫色', '黄色']


# 重试解析器（RetryWithErrorOutputParser）实战

In [30]:
template = """Based on the user question, provide an Action and Action Input for
{format_instructions}
Question: {query}
Response:"""

from pydantic import BaseModel, Field

class Action(BaseModel):
    action: str = Field(description="action to take")
    action_input: str = Field(description="input to the action")

    
    
from langchain.output_parsers import PydanticOutputParser

parser = PydanticOutputParser(pydantic_object=Action)


from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
prompt_value = prompt.format_prompt(query="What are the colors of Orchid?")

bad_response = '{"action": "search"}'
# parser.parse(bad_response) 


from langchain.output_parsers import OutputFixingParser
from langchain_openai import ChatOpenAI

fix_parser = OutputFixingParser.from_llm(parser=parser, llm=ChatOpenAI(model_name=model_name))
parse_result = fix_parser.parse(bad_response)

print('OutputFixingParser的parse结果:',parse_result)

OutputFixingParser的parse结果: action='search' action_input='query'


# RetryWithErrorOutputParser

In [32]:
from langchain.output_parsers import RetryWithErrorOutputParser
from langchain_openai import ChatOpenAI

retry_parser = RetryWithErrorOutputParser.from_llm(
    parser=parser, llm=ChatOpenAI(model_name=model_name, temperature=0)
)
parse_result = retry_parser.parse_with_prompt(bad_response, prompt_value)
print('RetryWithErrorOutputParser的parse结果:', parse_result)

RetryWithErrorOutputParser的parse结果: action='search' action_input='colors of Orchid'
