In [1]:
from dotenv import load_dotenv

load_dotenv("../config/.env")

True

In [None]:
from typing import Optional
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models import init_chat_model

class Person(BaseModel):
    """关于一个人的信息。"""
    name: Optional[str] = Field(
        default=None, description="这个人的名字"
    )
    hair_color: Optional[str] = Field(
        default=None, description="这个人的头发颜色，如果有的话"
    )
    height_in_meters: Optional[str] = Field(
        default=None, description="以米为单位的身高"
    )

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "你是一个专业的提取算法。"
            "只从文本中提取相关信息。"
            "如果你不知道所要求提取的属性的值，"
            "则返回该属性值为 null。”",
        ),
        ("human", "{text}"),
    ]
)

text = "Frank 身高 6 英尺，有着一头黑发。"
prompt = prompt_template.invoke({"text": text})

llm = init_chat_model("deepseek-chat", model_provider="deepseek")
structured_llm = llm.with_structured_output(schema=Person)

structured_llm.invoke(prompt)

Person(name='Frank', hair_color='黑发', height_in_meters=None)

In [6]:
from typing import List

class Data(BaseModel):
    people: List[Person]

multiple_entities_text = "我叫Frank，我的头发是黑色的，身高 6 英尺。Daneila的头发颜色和我一样。"
multiple_entities_prompt = prompt_template.invoke(multiple_entities_text)
multiple_entities_structured_llm = llm.with_structured_output(schema=Data)
multiple_entities_structured_llm.invoke(multiple_entities_prompt)

Data(people=[Person(name='Frank', hair_color='黑色', height_in_meters=None), Person(name='Daneila', hair_color='黑色', height_in_meters=None)])

In [8]:
messages = [
    {"role": "user", "content": "1 ~~ 1"},
    {"role": "assistant", "content": "2"},
    {"role": "user", "content": "2 ~~ 2"},
    {"role": "assistant", "content": "4"},
    {"role": "user", "content": "3 ~~ 3"},
]

response = llm.invoke(messages)
response.content

'9'

In [13]:
from langchain_core.utils.function_calling import tool_example_to_messages

examples = [
    (
        "海洋广阔而湛蓝，其深度超过 20,000 英尺。",
        Data(people=[]),
    ),
    (
        "Frank从法国长途跋涉到了西班牙。",
        Data(people=[Person(name="Frank", height_in_meters="175", hair_color=None)]),
    )
]

messages = []

for txt, tool_call in examples:
    print(tool_call)
    print(tool_call.people)
    if tool_call.people:
        ai_response = "Detected people."
    else:
        ai_response = "Detected no people."
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

for message in messages:
    message.pretty_print()

people=[]
[]
people=[Person(name='Frank', hair_color=None, height_in_meters='175')]
[Person(name='Frank', hair_color=None, height_in_meters='175')]

海洋广阔而湛蓝，其深度超过 20,000 英尺。
Tool Calls:
  Data (8f082030-0990-4047-8c10-a012d93f7692)
 Call ID: 8f082030-0990-4047-8c10-a012d93f7692
  Args:
    people: []

You have correctly called this tool.

Detected no people.

Frank从法国长途跋涉到了西班牙。
Tool Calls:
  Data (243b10ba-7cdd-4291-b90f-732d624ad2c1)
 Call ID: 243b10ba-7cdd-4291-b90f-732d624ad2c1
  Args:
    people: [{'name': 'Frank', 'hair_color': None, 'height_in_meters': '175'}]

You have correctly called this tool.

Detected people.


In [15]:
message_no_extraction = {
    "role": "user",
    "content": "太阳系很大，但地球只有 1 颗卫星。",
}

structured_llm = llm.with_structured_output(schema=Data)
structured_llm.invoke([message_no_extraction])

Data(people=[])

In [16]:
structured_llm.invoke(messages + [message_no_extraction])

Data(people=[])