In [1]:
from dotenv import load_dotenv, find_dotenv

# read local .env file
# find_dotenv 函数通常用于搜索和定位目录树中的 .env 文件，而 load_dotenv 函数用于将 .env 文件中的变量加载到环境中。
_ = load_dotenv(find_dotenv())

In [2]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)

In [3]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate 

In [4]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, field_validator
from typing import List
 

In [5]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")
 
    # You can add custom validation logic easily with Pydantic.
    @field_validator('setup')
    def question_ends_with_question_mark(cls, field):
        if field[-1] != '?':
            raise ValueError("Badly formed question!")
        return field
 
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."
 
# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)
 
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)
 
_input = prompt.format_prompt(query=joke_query)
 
output = llm.invoke(_input.to_string())
 
output
 

AIMessage(content='{\n  "setup": "Why don\'t scientists trust atoms?",\n  "punchline": "Because they make up everything!"\n}')

In [None]:
""" output
AIMessage(content='{\n  "setup": "Why don\'t scientists trust atoms?",\n  "punchline": "Because they make up everything!"\n}')
"""

In [6]:
parser.parse(output.content)

Joke(setup="Why don't scientists trust atoms?", punchline='Because they make up everything!')

In [None]:
""" output
Joke(setup="Why don't scientists trust atoms?", punchline='Because they make up everything!')
"""

In [7]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")
 
actor_query = "Generate the filmography for a random actor."
 
parser = PydanticOutputParser(pydantic_object=Actor)
 
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)
 
parser.get_format_instructions()
 

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"description": "name of an actor", "title": "Name", "type": "string"}, "film_names": {"description": "list of names of films they starred in", "items": {"type": "string"}, "title": "Film Names", "type": "array"}}, "required": ["name", "film_names"]}\n```'

In [None]:
""" output
'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"description": "name of an actor", "title": "Name", "type": "string"}, "film_names": {"description": "list of names of films they starred in", "items": {"type": "string"}, "title": "Film Names", "type": "array"}}, "required": ["name", "film_names"]}\n```'
"""

In [8]:
_input = prompt.format_prompt(query=actor_query)
 
output = llm.invoke(_input.to_messages())
output

AIMessage(content='{\n  "name": "Tom Hanks",\n  "film_names": [\n    "Forrest Gump",\n    "Cast Away",\n    "Saving Private Ryan",\n    "Toy Story",\n    "The Green Mile"\n  ]\n}')

In [None]:
""" output
AIMessage(content='{\n  "name": "Tom Hanks",\n  "film_names": [\n    "Forrest Gump",\n    "Cast Away",\n    "Saving Private Ryan",\n    "Toy Story",\n    "The Green Mile"\n  ]\n}')
"""

In [9]:
parser.parse(output.content)

Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'The Green Mile'])

In [None]:
""" output
Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'The Green Mile'])
"""

In [10]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @field_validator("setup")
    def question_ends_with_question_mark(cls, field):
        if field[-1] != "?":
            raise ValueError("Badly formed question!")
        return field


# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": joke_query})

Joke(setup="Why don't scientists trust atoms?", punchline='Because they make up everything!')

In [None]:
""" output
Joke(setup="Why don't scientists trust atoms?", punchline='Because they make up everything!')
"""

In [11]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")


actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": actor_query})

Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'Apollo 13'])

In [None]:
""" output
Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'Apollo 13'])
"""