# 结构化输出(Structured Output)
有时候我们希望输出的内容不是普通的字符串文本，而是像 json 那样结构化的数据，更能快捷高效地交付给下游业务使用。

In [2]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [3]:
from langchain_community.chat_models import ChatSparkLLM

# 科大讯飞 LLM
spark_appid = os.environ['spark_appid']
spark_api_secret = os.environ['spark_api_secret']
spark_api_key = os.environ['spark_api_key']


llm = ChatSparkLLM(
    spark_app_id=spark_appid, spark_api_key=spark_api_key, spark_api_secret=spark_api_secret
)

llm.invoke('你好')

AIMessage(content='你好！有什么我可以帮助你的吗？', response_metadata={'token_usage': {'question_tokens': 1, 'prompt_tokens': 1, 'completion_tokens': 8, 'total_tokens': 9}}, id='run-e5b5c1a6-3643-4d82-9b71-44907a6c8092-0')

In [4]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import PromptTemplate

In [5]:
# 告诉他我们生成的内容需要哪些字段，每个字段类型式啥
# 定义响应的结构(JSON)，两个字段 answer和source。
response_schemas = [
    ResponseSchema(name="answer", description="answer to the user's question"),
    ResponseSchema(name="source", description="source referred to answer the user's question, should be a website.")
]

# 初始化解析器
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

# 获取响应格式化的指令
format_instructions = output_parser.get_format_instructions()
format_instructions += '\n Only markdown content, no other else!'
format_instructions

'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"answer": string  // answer to the user\'s question\n\t"source": string  // source referred to answer the user\'s question, should be a website.\n}\n```\n Only markdown content, no other else!'

In [6]:
# partial_variables允许在代码中预填充提示此模版的部分变量。这类似于接口，抽象类之间的关系prompt = PromptTemplate(
#template="answer the users question as best as possible.\n{format_instructions}\n{question}",
template = """
answer the users question as best as possible.

{format_instructions}

% QUESTION:
{question}

"""


# 将我们的格式描述嵌入到 prompt 中去，告诉 llm 我们需要他输出什么样格式的内容
prompt = PromptTemplate(
    input_variables=["question"],
    partial_variables={"format_instructions": format_instructions},
    template=template
)

In [14]:
response = prompt.format_prompt(question="what's the capital of France?")

output = llm.invoke(response.to_string())
print(output)



content='```json\n{\n\t"answer": "Paris",\n\t"source": "https://en.wikipedia.org/wiki/Capital_of_France"\n}\n```' response_metadata={'token_usage': {'question_tokens': 120, 'prompt_tokens': 120, 'completion_tokens': 42, 'total_tokens': 162}} id='run-8f66c3b0-917a-470f-b6a9-631250815cfc-0'


In [22]:
print(type(output))

ans = output_parser.parse(output.content)
print(ans)

<class 'langchain_core.messages.ai.AIMessage'>
```json
{
	"answer": "Paris",
	"source": "https://en.wikipedia.org/wiki/Capital_of_France"
}
```
{'answer': 'Paris', 'source': 'https://en.wikipedia.org/wiki/Capital_of_France'}
