In [1]:
%load_ext autoreload
%autoreload 2

# How to return structured data from a model
_Reference_: [https://python.langchain.com/docs/how_to/structured_output/](https://python.langchain.com/docs/how_to/structured_output/)

In [2]:
def load_env_to_dict(file_path):
    env_dict = {}
    with open(file_path, "r") as file:
        for line in file:
            # Remove whitespace and ignore comments or empty lines
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            # Split the line into key and value
            key, value = line.split("=", 1)
            env_dict[key.strip()] = value.strip()
    return env_dict

In [3]:
file_path = "/mnt/Exdisk/git-cuongpiger/secret/work/vngcloud/ai-platform/env"
env_variables = load_env_to_dict(file_path)

In [4]:
import os

if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = env_variables["HUGGINGFACE_API_KEY"]

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = (
    "/mnt/Exdisk/git-cuongpiger/secret/work/vngcloud/ai-platform/vertex-ai-credential.json"
)

In [5]:
from langchain_google_vertexai import ChatVertexAI

In [6]:
llm = ChatVertexAI(model="gemini-1.5-flash")

# Pydantic class

In [7]:
from typing import Optional

from pydantic import BaseModel, Field

In [8]:
# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )

In [9]:
structured_llm = llm.with_structured_output(Joke)

In [10]:
structured_llm.invoke("hãy kể cho tôi một câu chuyện cười về ông già")

Joke(setup='Tại sao ông già lại là thám tử giỏi nhất?', punchline='Bởi vì nó có thể nhớ được mọi thứ!', rating=None)

# TypedDict or JSON Schema

In [11]:
from typing import Optional
from typing_extensions import Annotated, TypedDict

In [12]:
# TypedDict
class Joke(TypedDict):
    """Joke to tell user."""

    setup: Annotated[str, ..., "The setup of the joke"]

    # Alternatively, we could have specified setup as:

    # setup: str                    # no default, no description
    # setup: Annotated[str, ...]    # no default, no description
    # setup: Annotated[str, "foo"]  # default, no description

    punchline: Annotated[str, ..., "The punchline of the joke"]
    rating: Annotated[Optional[int], None, "How funny the joke is, from 1 to 10"]

In [13]:
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}

In [15]:
structured_llm = llm.with_structured_output(json_schema)
structured_llm.invoke("Kể tôi một câu chuyện cười về ông già")

{'rating': 7.0,
 'punchline': 'Vì họ đã sống lâu và học được rất nhiều điều!',
 'setup': 'Tại sao người già luôn kể chuyện cười?'}

# Choosing between multiple schemas

## Using Pydantic

In [16]:
from typing import Union

In [18]:
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )


class ConversationalResponse(BaseModel):
    """Respond in a conversational manner. Be kind and helpful."""

    response: str = Field(description="A conversational response to the user's query")


class FinalResponse(BaseModel):
    final_output: Union[Joke, ConversationalResponse]

In [20]:
structured_llm = llm.with_structured_output(FinalResponse)

Key '$defs' is not supported in schema, ignoring


## Using TypedDict

In [21]:
from typing import Optional, Union

from typing_extensions import Annotated, TypedDict

In [22]:
class Joke(TypedDict):
    """Joke to tell user."""

    setup: Annotated[str, ..., "The setup of the joke"]
    punchline: Annotated[str, ..., "The punchline of the joke"]
    rating: Annotated[Optional[int], None, "How funny the joke is, from 1 to 10"]


class ConversationalResponse(TypedDict):
    """Respond in a conversational manner. Be kind and helpful."""

    response: Annotated[str, ..., "A conversational response to the user's query"]


class FinalResponse(TypedDict):
    final_output: Union[Joke, ConversationalResponse]

In [23]:
structured_llm = llm.with_structured_output(FinalResponse)

ValueError: no signature found for builtin type <class 'dict'>

# Streaming

In [24]:
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )

In [25]:
structured_llm = llm.with_structured_output(Joke)

In [27]:
for chunk in structured_llm.stream("Tell me a joke about cats"):
    print(chunk)

setup='Why don' punchline='Why don' rating=7


# Few-shot prompting

In [29]:
from langchain_core.prompts import ChatPromptTemplate

In [30]:
system = """You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:

example_user: Tell me a joke about planes
example_assistant: {{"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}}

example_user: Tell me another joke about planes
example_assistant: {{"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}}

example_user: Now about caterpillars
example_assistant: {{"setup": "Caterpillar", "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!", "rating": 5}}"""

In [32]:
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{input}")])

In [33]:
few_shot_structured_llm = prompt | structured_llm

In [34]:
few_shot_structured_llm.invoke("what's something funny about woodpeckers")

Joke(setup='Woodpecker', punchline='Woodpeckers really get hammered!', rating=7)

In [35]:
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage

In [36]:
examples = [
    HumanMessage("Tell me a joke about planes", name="example_user"),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Why don't planes ever get tired?",
                    "punchline": "Because they have rest wings!",
                    "rating": 2,
                },
                "id": "1",
            }
        ],
    ),
    # Most tool-calling models expect a ToolMessage(s) to follow an AIMessage with tool calls.
    ToolMessage("", tool_call_id="1"),
    # Some models also expect an AIMessage to follow any ToolMessages,
    # so you may need to add an AIMessage here.
    HumanMessage("Tell me another joke about planes", name="example_user"),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Cargo",
                    "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!",
                    "rating": 10,
                },
                "id": "2",
            }
        ],
    ),
    ToolMessage("", tool_call_id="2"),
    HumanMessage("Now about caterpillars", name="example_user"),
    AIMessage(
        "",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Caterpillar",
                    "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!",
                    "rating": 5,
                },
                "id": "3",
            }
        ],
    ),
    ToolMessage("", tool_call_id="3"),
]

In [37]:
system = """You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") \
and the final punchline (the response to "<setup> who?")."""

In [38]:
prompt = ChatPromptTemplate.from_messages(
    [("system", system), ("placeholder", "{examples}"), ("human", "{input}")]
)

In [39]:
few_shot_structured_llm = prompt | structured_llm

In [40]:
few_shot_structured_llm.invoke({"input": "crocodiles", "examples": examples})

Joke(setup='Why do crocodiles have such good lawyers?', punchline="They're always running around saying, 'See ya later, alligator!'", rating=8)