In [None]:
# 05 输出解析器
# LLM的输出为文本，但在程序中除了显示文本，可能希望获得更结构化的数据。这就是输出解析器（Output Parsers）的用武之地。

In [1]:
from langchain.output_parsers import CommaSeparatedListOutputParser

# List Parser将逗号分隔的文本解析为列表。
output_parser = CommaSeparatedListOutputParser()
output_parser.parse("black, yellow, red, green, white, blue")

['black', 'yellow', 'red', 'green', 'white', 'blue']

In [ ]:
# Structured Output Parser
# 当我们想要类似JSON数据结构，包含多个字段时，可以使用这个输出解析器。
# 该解析器可以生成指令帮助LLM返回结构化数据文本，同时完成文本到结构化数据的解析工作。

In [3]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI

# 定义响应的结构(JSON)，两个字段 answer和source。
response_schemas = [
    ResponseSchema(name="answer", description="对用户问题的回答"),
    ResponseSchema(name="source", description="用于回答用户问题所引用的来源，应该是一个网站。")
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

# 获取响应格式化的指令
format_instructions = output_parser.get_format_instructions()

format_instructions

'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"answer": string  // 对用户问题的回答\n\t"source": string  // 用于回答用户问题所引用的来源，应该是一个网站。\n}\n```'

In [4]:
import os
from langchain_community.llms.cloudflare_workersai import CloudflareWorkersAI
from langchain_community.llms.tongyi import Tongyi
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

load_dotenv(override=True)

account_id = os.getenv('CF_ACCOUNT_ID')
api_token = os.getenv('CF_API_TOKEN')
print(account_id)
print(api_token)

# CloudflareWorkersAI
model = '@cf/meta/llama-3-8b-instruct'
cf_llm = CloudflareWorkersAI(
    account_id=account_id,
    api_token=api_token,
    model=model
)

DASHSCOPE_API_KEY = os.getenv('DASHSCOPE_API_KEY')
print(DASHSCOPE_API_KEY)

# qwen
qw_llm = Tongyi(
    model='qwen2-1.5b-instruct'
)

# qwen 兼容 openai的接口
qw_llm_openai = ChatOpenAI(
    openai_api_base='https://dashscope.aliyuncs.com/compatible-mode/v1',
    openai_api_key=DASHSCOPE_API_KEY,
    model_name="qwen2-1.5b-instruct",
    temperature=0.7,
    streaming=True,
)

api_key = os.getenv('OPENAI_API_KEY')
base_url = os.getenv('OPENAI_API_BASE')
print(api_key)
print(base_url)

# openai/moonshot
ms_llm = ChatOpenAI(
    openai_api_base=base_url,
    openai_api_key=api_key,
    model_name="moonshot-v1-8k",
    temperature=0.7,
)

8483c3ec7a0cbc54a8d660b5b9002b04
Gcllof8ze6dgtcqFI5FQZ2SD_5tfCD4Db7NuS6jn
sk-01c5003340c3453b934052d737d45e01
sk-UGVpjuTwo2Q8pewoqUDfckw1A0pbSDli9ElFMeS9WareKknG
https://api.moonshot.cn/v1/


In [10]:
from langchain_core.output_parsers import StrOutputParser

# partial_variables允许在代码中预填充提示此模版的部分变量。这类似于接口，抽象类之间的关系
prompt = PromptTemplate(
    template="answer the users question as best as possible.\n{format_instructions}\n{question}",
    input_variables=["question"],
    partial_variables={"format_instructions": format_instructions}
)

response = prompt.format_prompt(question="Who is the CEO of Tesla?")
print(response)
output = qw_llm_openai.invoke(response.to_string())
# output = response.to_string() | qw_llm_openai | StrOutputParser()
print(output)

text='answer the users question as best as possible.\nThe output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"answer": string  // 对用户问题的回答\n\t"source": string  // 用于回答用户问题所引用的来源，应该是一个网站。\n}\n```\nWho is the CEO of Tesla?'


TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'str'>

In [8]:
output_parser.parse(output)

text='answer the users question as best as possible.\nThe output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"answer": string  // 对用户问题的回答\n\t"source": string  // 用于回答用户问题所引用的来源，应该是一个网站。\n}\n```\nWho is the CEO of Tesla?'
content='```json\n{\n\t"answer": "埃隆·马斯克是特斯拉公司的CEO。",\n\t"source": "https://www.businessinsider.com/elon-musk-is-the-ceo-of-tesla-2019-5"\n}\n```' response_metadata={'finish_reason': 'stop'} id='run-288ee521-ebe5-456b-907a-f1679fdf23a9-0'


AttributeError: 'AIMessage' object has no attribute 'strip'

In [11]:
# 自定义输出解析器
# 扩展CommaSeparatedListOutputParser，让其返回的列表是经过排序的。
from typing import List


class SortedCommaSeparatedListOutputParser(CommaSeparatedListOutputParser):
    def parse(self, text: str) -> List[str]:
        lst = super().parse(text)
        return sorted(lst)


output_parser = SortedCommaSeparatedListOutputParser()
output_parser.parse("black, yellow, red, green, white, blue")

['black', 'blue', 'green', 'red', 'white', 'yellow']