In [2]:
from pydantic import BaseModel, Field
from typing import List, Optional

In [3]:
class Product(BaseModel):
    name: str = Field(description="Name of the product")
    category: str = Field(description="Category the product belongs to")
    price: float = Field(description="Price of the product")
    description: Optional[str] = Field(None, description="A brief description of the product")
    tags: List[str] = Field(description="A list of relevant tags for the product")

class ProductList(BaseModel):
    products: List[Product] = Field(description="A list of products")

In [5]:
from langchain_litellm import ChatLiteLLM

DASHSCOPE_API_BASE="https://dashscope.aliyuncs.com/compatible-mode/v1"
llm = ChatLiteLLM(model="dashscope/qwen-plus", temperature=0.1)

In [6]:
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
from langchain.schema import OutputParserException

In [8]:
# Set up the Pydantic parser
parser = PydanticOutputParser(pydantic_object=ProductList)

# In case the LLM doesn't perfectly follow the schema, OutputFixingParser can help
# It uses the LLM itself to fix parsing errors.
fixing_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)

In [10]:
# Define the prompt template
prompt_template = """
You are a helpful assistant that extracts product information from text.
Please extract the following product details and format them as JSON according to the Pydantic schema provided below.

Pydantic Schema:
{format_instructions}

Text to extract from:
{text}
"""

prompt = ChatPromptTemplate.from_template(template=prompt_template)

In [12]:
# Format instructions for the LLM
format_instructions = parser.get_format_instructions()

In [13]:
format_instructions

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"$defs": {"Product": {"properties": {"name": {"description": "Name of the product", "title": "Name", "type": "string"}, "category": {"description": "Category the product belongs to", "title": "Category", "type": "string"}, "price": {"description": "Price of the product", "title": "Price", "type": "number"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "A brief description of the product", "title": "Description"}, "tags": {"description": "A list of relevant tags for the product", "ite

In [14]:
# The text containing product information
text_to_analyze = """
We have a new gadget called the "Quantum Widget". It's a cutting-edge device in the electronics category,
priced at $199.99. This widget revolutionizes home automation. Tags: smart, home, tech, innovation.

Also, check out our "Eco-Friendly Water Bottle". It's in the outdoor gear category, costs $25.00,
and is made from recycled materials, perfect for hiking and daily use. Tags: eco, outdoor, hydration, recycled.
"""

In [15]:
# Create the chain
chain = prompt | llm | fixing_parser

In [16]:
# Invoke the chain
try:
    output = chain.invoke({
        "text": text_to_analyze,
        "format_instructions": format_instructions
    })
    print(output)
except OutputParserException as e:
    print(f"Failed to parse output: {e}")

products=[Product(name='Quantum Widget', category='electronics', price=199.99, description='This widget revolutionizes home automation.', tags=['smart', 'home', 'tech', 'innovation']), Product(name='Eco-Friendly Water Bottle', category='outdoor gear', price=25.0, description='It is made from recycled materials, perfect for hiking and daily use.', tags=['eco', 'outdoor', 'hydration', 'recycled'])]


In [17]:
# Example of accessing the parsed data
for product in output.products:
    print(f"Product Name: {product.name}")
    print(f"Category: {product.category}")
    print(f"Price: ${product.price:.2f}")
    print(f"Description: {product.description if product.description else 'N/A'}")
    print(f"Tags: {', '.join(product.tags)}\n")

Product Name: Quantum Widget
Category: electronics
Price: $199.99
Description: This widget revolutionizes home automation.
Tags: smart, home, tech, innovation

Product Name: Eco-Friendly Water Bottle
Category: outdoor gear
Price: $25.00
Description: It is made from recycled materials, perfect for hiking and daily use.
Tags: eco, outdoor, hydration, recycled



In [18]:
text_to_analyze_zh = """
我们推出了一款名为“量子智能小工具”（Quantum Widget）的全新智能设备。这是一款尖端电子产品，

售价 199.99 美元。这款小工具将彻底改变智能家居体验。标签：智能、家居、科技、创新。

此外，也来看看我们的“环保水瓶”。它属于户外装备类别，售价 25.00 美元，

采用再生材料制成，非常适合徒步旅行和日常使用。标签：环保、户外、补水、再生材料。
"""
try:
    output = chain.invoke({
        "text": text_to_analyze_zh,
        "format_instructions": format_instructions
    })
    print(output)
except OutputParserException as e:
    print(f"Failed to parse output: {e}")

for product in output.products:
    print(f"Product Name: {product.name}")
    print(f"Category: {product.category}")
    print(f"Price: ${product.price:.2f}")
    print(f"Description: {product.description if product.description else 'N/A'}")
    print(f"Tags: {', '.join(product.tags)}\n")

products=[Product(name='量子智能小工具', category='电子产品', price=199.99, description='这款小工具将彻底改变智能家居体验。', tags=['智能', '家居', '科技', '创新']), Product(name='环保水瓶', category='户外装备', price=25.0, description='采用再生材料制成，非常适合徒步旅行和日常使用。', tags=['环保', '户外', '补水', '再生材料'])]
Product Name: 量子智能小工具
Category: 电子产品
Price: $199.99
Description: 这款小工具将彻底改变智能家居体验。
Tags: 智能, 家居, 科技, 创新

Product Name: 环保水瓶
Category: 户外装备
Price: $25.00
Description: 采用再生材料制成，非常适合徒步旅行和日常使用。
Tags: 环保, 户外, 补水, 再生材料

