In [None]:
import os
from uuid import uuid4

unique_id = uuid4().hex[0:8]
os.environ["LANGSMITH_TRACING_V2"] = "true"
os.environ["LANGSMITH_PROJECT"] = f"classification - {unique_id}"



In [8]:
from typing import List, Optional

from pydantic import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    # ^ Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field(default=None, description="The name of the person")
    hair_color: Optional[str] = Field(
        default=None, description="The color of the person's hair if known"
    )
    """
    height_in_meters: Optional[str] = Field(
        default=None, description="Height measured in meters"
    )
    """

class Data(BaseModel):
    """Extracted data about people."""

    # Creates a model so that we can extract multiple entities.
    people: List[Person]

In [4]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

In [10]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")

structured_llm = llm.with_structured_output(schema=Data)
text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Key '$defs' is not supported in schema, ignoring


ParseError: Failed to parse properties field: Failed to parse items field: Failed to parse properties field: Failed to parse anyOf field: Failed to parse type field: Invalid enum value string for enum type google.cloud.aiplatform.v1beta1.Type at Schema.properties[people].items.properties[name].anyOf[0].type.....

In [None]:




examples = [

    (
        "The ocean is very deep. In some location, it is more than 200,000 feet deep.", # example input text
        Data(persons=[]) # represent the final output from the model.
    ),
    (
        "Mountain everest is the highest mountain in the world. It is located between China and India.",
        Data(persons=[])
    ),
    (
        "In our group, Jason with black hair plays best, most because he is 7 feet tall.",
        Data(persons=[Person(name="jason", hair_color="black", height_in_meters="2.13")])
    ),
    (
        "In our group, Jason with black hair plays best, most because he is 7-feet tall. Jerry, Jason's brother, plays ok.",
        Data(persons=[Person(name="jason", hair_color="black", height_in_meters="2.13"), 
                      Person(name="jerry", hair_color=None, height_in_meters=None)])
    )
]

# The issue here is that how we can provide our example structured output to LLM so that it can use as few shot examples. 
# What we will do is use a utility to convert it too an example AIMessage. The structured output will be tool_calls args.

from langchain_core.utils.function_calling import tool_example_to_messages

messages =[]
for input, tool_args in examples:
    if len(tool_args.persons) >0:
        content="person found."
    else:
        content = "no person found."
    messages.extend(tool_example_to_messages(input, [tool_args], ai_response=content))




In [None]:
from langchain_core.messages import SystemMessage, HumanMessage

llm_with_example = ChatVertexAI(model="gemini-2-flash-001", temprature=0).with_structured_output(schema=Data)
sys_msg = SystemMessage(
    """you are going to extrat useful information  about people mentione from the user text.

    Only extract relevant information from the text. 
            If you do not know the value of an attribute asked to extract, 
            return null for the attribute's value.
            
    here are some examples
    """
)

llm_with_example.invoke([sys_msg]+messages + [HumanMessage("Silver Galaxy system is vast, more than 10000 light year in radius.")], version="v2")




In [51]:
llm_with_example.invoke(HumanMessage("In our group, Jason with black hair plays best"))

ValueError: Invalid input type <class 'langchain_core.messages.human.HumanMessage'>. Must be a PromptValue, str, or list of BaseMessages.