# Pydantic（JSON）解析器实战

## 1. LLM

In [2]:
from langchain_openai import AzureChatOpenAI

deployment = "gpt-4o"
llm = AzureChatOpenAI(
    model_name=deployment,
    temperature=0.8,
    max_tokens=1024
)

## 2. DataFrame 用于存储结果

In [3]:
import pandas as pd
from pydantic import BaseModel, Field

df = pd.DataFrame(columns=["flower_type", "price", "description", "reason"])

flowers = ["玫瑰", "百合", "康乃馨"]
prices = ["50", "30", "20"]


class FlowerDescription(BaseModel):
    flower_type: str = Field(description="鲜花的种类")
    price: int = Field(description="鲜花的价格")
    description: str = Field(description="鲜花的描述文案")
    reason: str = Field(description="为什么要这样写这个文案")

## 3. PydanicOutputPaser

In [4]:
from langchain.output_parsers import PydanticOutputParser

output_paser = PydanticOutputParser(pydantic_object=FlowerDescription)

format_instructions = output_paser.get_format_instructions()
print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"flower_type": {"description": "\u9c9c\u82b1\u7684\u79cd\u7c7b", "title": "Flower Type", "type": "string"}, "price": {"description": "\u9c9c\u82b1\u7684\u4ef7\u683c", "title": "Price", "type": "integer"}, "description": {"description": "\u9c9c\u82b1\u7684\u63cf\u8ff0\u6587\u6848", "title": "Description", "type": "string"}, "reason": {"description": "\u4e3a\u4ec0\u4e48\u8981\u8fd9\u6837\u5199\u8fd9\u4e2a\u6587\u6848", "title": "Reason", "type": "string"}}, "required": ["flower_type", "price", "description", "reason"]}
```


## 4. Prompt

In [5]:
from langchain.prompts import PromptTemplate

prompt_template = """您是一位专业的鲜花店文案撰写员。
对于售价为 {price} 元的 {flower} ，您能提供一个吸引人的简短中文描述吗？
{format_instructions}"""

prompt = PromptTemplate.from_template(prompt_template, partial_variables={"format_instructions": format_instructions})
print(prompt)

input_variables=['flower', 'price'] partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"flower_type": {"description": "\\u9c9c\\u82b1\\u7684\\u79cd\\u7c7b", "title": "Flower Type", "type": "string"}, "price": {"description": "\\u9c9c\\u82b1\\u7684\\u4ef7\\u683c", "title": "Price", "type": "integer"}, "description": {"description": "\\u9c9c\\u82b1\\u7684\\u63cf\\u8ff0\\u6587\\u6848", "title": "Description", "type": "string"}, "reason": {"description": "\\u4e3a\\u4ec0\\u4e48\\u8981\\u8fd9\\u6837\\u5199\\u8fd9\\u4e2a\\u6587\\u6848"

## 5. Response

In [6]:
response = llm.invoke(prompt.format_prompt(price=50, flower="玫瑰"))
print(response.content)

```json
{
  "flower_type": "玫瑰",
  "price": 50,
  "description": "浪漫情怀，尽在这支绽放的玫瑰。为爱的人送上一份特别的惊喜，只需 50 元，点亮她/他的心。",
  "reason": "通过强调玫瑰的浪漫象征以及亲民的价格，吸引顾客购买，送给他们的爱人或特别的人。"
}
```


In [8]:
parsed_response= output_paser.parse(response.content)
print(type(parsed_response), parsed_response)
parsed_response_dict = parsed_response.dict()
print(type(parsed_response_dict), parsed_response_dict)

<class '__main__.FlowerDescription'> flower_type='玫瑰' price=50 description='浪漫情怀，尽在这支绽放的玫瑰。为爱的人送上一份特别的惊喜，只需 50 元，点亮她/他的心。' reason='通过强调玫瑰的浪漫象征以及亲民的价格，吸引顾客购买，送给他们的爱人或特别的人。'
<class 'dict'> {'flower_type': '玫瑰', 'price': 50, 'description': '浪漫情怀，尽在这支绽放的玫瑰。为爱的人送上一份特别的惊喜，只需 50 元，点亮她/他的心。', 'reason': '通过强调玫瑰的浪漫象征以及亲民的价格，吸引顾客购买，送给他们的爱人或特别的人。'}


In [9]:
for flower, price in zip(flowers, prices):
    response = llm.invoke(prompt.format_prompt(flower=flower, price=price))
    parsed_response = output_paser.parse(response.content)
    df.loc[len(df)] = parsed_response.dict()
    
print("result: ", df.to_dict(orient="records"))

result:  [{'flower_type': '玫瑰', 'price': 50, 'description': '这束优雅的玫瑰花象征着纯洁与爱情。每一朵都经过精心挑选，只为献给最特别的你。', 'reason': '玫瑰花自古以来就是爱情的象征，这个描述突出了其浪漫与优雅，能够吸引希望表达爱意的顾客。'}, {'flower_type': '百合', 'price': 30, 'description': '纯洁高雅的百合花，象征着纯净的爱与美好，30元让你感受到大自然的温柔。', 'reason': '通过突显百合花的象征意义和价值，吸引顾客购买并传达其情感表达的完美选择。'}, {'flower_type': '康乃馨', 'price': 20, 'description': '象征母爱与温馨的康乃馨，只需20元，为您传递无尽感恩与关怀。', 'reason': '以温馨感人的语句突出康乃馨的象征意义，同时强调价格亲民，吸引顾客购买。'}]
