In [1]:
# 基本配置
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv

load_dotenv(override=True)

qw_llm_openai = ChatOpenAI(
    openai_api_base=os.getenv('DASHSCOPE_API_BASE'),
    openai_api_key=os.getenv('DASHSCOPE_API_KEY'),
    model_name="qwen2-1.5b-instruct",
    temperature=0,
    streaming=True,
)

ms_llm_openai = ChatOpenAI(
    openai_api_base=os.getenv('MOONSHOT_API_BASE'),
    openai_api_key=os.getenv('MOONSHOT_API_KEY'),
    model_name="moonshot-v1-8k",
    temperature=0,
    streaming=True,
)

cf_llm_openai = ChatOpenAI(
    openai_api_base=os.getenv('CF_API_BASE'),
    openai_api_key=os.getenv('CF_API_TOKEN'),
    model_name="@cf/meta/llama-3-8b-instruct",
    temperature=0,
    streaming=True,
)

groq_llm_openai = ChatOpenAI(
    openai_api_base=os.getenv('GROQ_API_BASE'),
    openai_api_key=os.getenv('GROQ_API_KEY'),
    model_name="llama3-8b-8192",
    temperature=0,
    streaming=True,
)

如果我们想要的模型返回Pydantic目的，我们只是需要通过在所需Pydantic类：

In [2]:
from typing import Optional

from langchain_core.pydantic_v1 import BaseModel, Field


class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")


structured_llm = qw_llm_openai.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

Joke(setup="Why don't cats make good dancers?", punchline='Because they already have four left feet.', rating=None)

我们还可以通过在一个"JSON Schema"结束的字典如果你不喜欢使用Pydantic. 在这种情况下，回应也是一个字典:

In [3]:
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
        },
    },
    "required": ["setup", "punchline"],
}
structured_llm = groq_llm_openai.with_structured_output(json_schema)

structured_llm.invoke("Tell me a joke about cats")

{'setup': 'Why did the cat join a band?',
 'punchline': 'Because it wanted to be the purr-cussionist!',
 'rating': 8}

Choosing between multiple schemas
选择之间的多种模式

最简单的方式，让该模型的选择从多个模式是创造一个父母Pydantic类有一个联盟类型的属性：

In [10]:
from typing import Union


class ConversationalResponse(BaseModel):
    """Respond in a conversational manner. Be kind and helpful."""

    response: str = Field(description="A conversational response to the user's query")


class Response(BaseModel):
    output: Union[Joke, ConversationalResponse]


structured_llm = qw_llm_openai.with_structured_output(Response)

structured_llm.invoke("Tell me a joke about cats")

Response(output=Joke(setup="Why don't cats make good dancers?", punchline='Because they always fall flat on their faces!', rating=None))

In [13]:
structured_llm.invoke("How are you today?")

ValidationError: 2 validation errors for Response
output
  value is not a valid dict (type=type_error.dict)
output
  value is not a valid dict (type=type_error.dict)

Streaming 流
We can stream outputs from our structured model when the output type is a dict (i.e., when the schema is specified as a JSON Schema dict).
我们可以流产出我们的结构模型的输出类型是字典(即，当架构被指定为"}"结束词典).

In [14]:
structured_llm = qw_llm_openai.with_structured_output(json_schema)

for chunk in structured_llm.stream("Tell me a joke about cats"):
    print(chunk)

{}
{'setup': "Why don't"}
{'setup': "Why don't cats make good dancers?"}
{'setup': "Why don't cats make good dancers?", 'punchline': 'Because they already have four'}
{'setup': "Why don't cats make good dancers?", 'punchline': 'Because they already have four left feet!'}


Few-shot prompting 几拍提示
最简单和最普遍的方法是添加实例，以一个系统信息的提示：

In [15]:
from langchain_core.prompts import ChatPromptTemplate

system = """You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:

example_user: Tell me a joke about planes
example_assistant: {{"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}}

example_user: Tell me another joke about planes
example_assistant: {{"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}}

example_user: Now about caterpillars
example_assistant: {{"setup": "Caterpillar", "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!", "rating": 5}}"""

prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{input}")])

few_shot_structured_llm = prompt | structured_llm
few_shot_structured_llm.invoke("what's something funny about woodpeckers")

{'setup': 'Woodpecker', 'punchline': "They're known for their peeling nuts!"}

When the underlying method for structuring outputs is tool calling, we can pass in our examples as explicit tool calls. You can check if the model you're using makes use of tool calling in its API reference.
当基础的方法的结构化产出的工具，呼吁，我们可以通过在我们的例子作为明确的工具的电话。 你可以检查，如果该模型使用的使用的工具，呼吁在其API reference。

In [16]:
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage

examples = [
    HumanMessage("Tell me a joke about planes", name="example_user"),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Why don't planes ever get tired?",
                    "punchline": "Because they have rest wings!",
                    "rating": 2,
                },
                "id": "1",
            }
        ],
    ),
    # Most tool-calling models expect a ToolMessage(s) to follow an AIMessage with tool calls.
    ToolMessage("", tool_call_id="1"),
    # Some models also expect an AIMessage to follow any ToolMessages,
    # so you may need to add an AIMessage here.
    HumanMessage("Tell me another joke about planes", name="example_user"),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Cargo",
                    "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!",
                    "rating": 10,
                },
                "id": "2",
            }
        ],
    ),
    ToolMessage("", tool_call_id="2"),
    HumanMessage("Now about caterpillars", name="example_user"),
    AIMessage(
        "",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Caterpillar",
                    "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!",
                    "rating": 5,
                },
                "id": "3",
            }
        ],
    ),
    ToolMessage("", tool_call_id="3"),
]
system = """You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") \
and the final punchline (the response to "<setup> who?")."""

# placeholder : 一个可用于传入消息列表的占位符。
prompt = ChatPromptTemplate.from_messages(
    [("system", system), ("placeholder", "{examples}"), ("human", "{input}")]
)
few_shot_structured_llm = prompt | structured_llm
few_shot_structured_llm.invoke({"input": "crocodiles", "examples": examples})

BadRequestError: Error code: 400 - {'error': {'code': 'invalid_parameter_error', 'param': None, 'message': 'field required: payload.input.messages.2.content & field required: payload.input.messages.5.content & field required: payload.input.messages.8.content', 'type': 'invalid_request_error'}, 'id': 'chatcmpl-5ae7322b-f6b8-98a6-9e66-cce34de896f6'}

(Advanced) Specifying the method for structuring outputs
(高级)指定方法的结构化产出

对于模型的支持，多个装置的构造的产出(即，他们支持这两个工具叫。模式)，可以指定方法的使用 method= 参数。

In [17]:
# 如果使用JSON模式你必须仍然指定所需的方案模型的提示。 
# 该架构你过来 with_structured_output 将只能用于分析的模型输出，它将不会被传递给模型的方式是与工具的呼吁。
structured_llm = qw_llm_openai.with_structured_output(Joke, method="json_mode")

structured_llm.invoke(
    "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys"
)

Joke(setup="Why don't cats make good dancers?", punchline='Because they always land on their feet.', rating=None)

(Advanced) Raw outputs (高级)原产出

你可以避免的提高和例外处理的原输出自己通过 include_raw=True . 这种变化的输出格式，以包含原始信息输出， parsed 值(如果成功的话)，以及由此产生的任何错误：

In [18]:
structured_llm = qw_llm_openai.with_structured_output(Joke, include_raw=True)

structured_llm.invoke(
    "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys"
)

{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_961db6d06251427596eaab', 'function': {'arguments': '{"setup": "Why don\'t cats make good dancers?", "punchline": "Because they always fall over!"}', 'name': 'Joke'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'stop', 'model_name': 'qwen2-1.5b-instruct'}, id='run-167889f7-febe-472d-a8bf-3b69dea553b1-0', tool_calls=[{'name': 'Joke', 'args': {'setup': "Why don't cats make good dancers?", 'punchline': 'Because they always fall over!'}, 'id': 'call_961db6d06251427596eaab', 'type': 'tool_call'}]),
 'parsed': Joke(setup="Why don't cats make good dancers?", punchline='Because they always fall over!', rating=None),
 'parsing_error': None}

Prompting and parsing model outputs directly
提示和分析模型产出的直接

不是所有的模型支持 .with_structured_output() ，由于并非所有模型有的工具，叫函模式的支持。 
对于这样的模式只需要直接提示模型使用一种特定的格式，并使用一个输出分析器来提取的结构化反应的原始模型的输出。

Using PydanticOutputParser 使用 PydanticOutputParser ​

In [19]:
from typing import List

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]


# Set up a parser
parser = PydanticOutputParser(pydantic_object=People)

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Wrap the output in `json` tags\n{format_instructions}",
        ),
        ("human", "{query}"),
    ]
).partial(format_instructions=parser.get_format_instructions())

In [20]:
query = "Anna is 23 years old and she is 6 feet tall"

print(prompt.invoke(query).to_string())

System: Answer the user query. Wrap the output in `json` tags
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"description": "Identifying information about all people in a text.", "properties": {"people": {"title": "People", "type": "array", "items": {"$ref": "#/definitions/Person"}}}, "required": ["people"], "definitions": {"Person": {"title": "Person", "description": "Information about a person.", "type": "object", "properties": {"name": {"title": "Name", "description": "The name of the person", "type": "string"}, "height_in_meters": {"title": "Height In Meters", "description": "The heig

In [21]:
chain = prompt | qw_llm_openai | parser

chain.invoke({"query": query})

People(people=[Person(name='Anna', height_in_meters=1.875)])

Custom Parsing 定义的分析

你还可以创建一个自定义的提示和分析器有LangChain表达的语言(LCEL)，使用普通功能分析的输出模型：

In [22]:
import json
import re
from typing import List

from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]


# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Output your answer as JSON that  "
            "matches the given schema: ```json\n{schema}\n```. "
            "Make sure to wrap the answer in ```json and ``` tags",
        ),
        ("human", "{query}"),
    ]
).partial(schema=People.schema())


# Custom parser
def extract_json(message: AIMessage) -> List[dict]:
    """Extracts JSON content from a string where JSON is embedded between ```json and ``` tags.

    Parameters:
        text (str): The text containing the JSON content.

    Returns:
        list: A list of extracted JSON strings.
    """
    text = message.content
    # Define the regular expression pattern to match JSON blocks
    pattern = r"```json(.*?)```"

    # Find all non-overlapping matches of the pattern in the string
    matches = re.findall(pattern, text, re.DOTALL)

    # Return the list of matched JSON strings, stripping any leading or trailing whitespace
    try:
        return [json.loads(match.strip()) for match in matches]
    except Exception:
        raise ValueError(f"Failed to parse: {message}")

In [23]:
query = "Anna is 23 years old and she is 6 feet tall"

print(prompt.format_prompt(query=query).to_string())

System: Answer the user query. Output your answer as JSON that  matches the given schema: ```json
{'title': 'People', 'description': 'Identifying information about all people in a text.', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'Information about a person.', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'The name of the person', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'The height of the person expressed in meters.', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}
```. Make sure to wrap the answer in ```json and ``` tags
Human: Anna is 23 years old and she is 6 feet tall


In [24]:
chain = prompt | qw_llm_openai | extract_json

chain.invoke({"query": query})

[]