In [2]:
!pip install --upgrade langchain-core
!pip install dotenv

Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting python-dotenv (from dotenv)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv, dotenv

   -------------------- ------------------- 1/2 [dotenv]
   ---------------------------------------- 2/2 [dotenv]

Successfully installed dotenv-0.9.9 python-dotenv-1.1.1


In [18]:
# LangSmith 환경변수 연결
import os
import getpass
from dotenv import load_dotenv

load_dotenv()  # .env 파일에서 환경변수 로드

# .env 파일에 LANGSMITH_TRACING, LANGSMITH_API_KEY가 저장되어 있다고 가정
# 별도의 코드 없이 os.environ을 통해 접근 가능
# 예시: os.environ["LANGSMITH_API_KEY"]
os.environ["LANGSMITH_TRACING"]
os.environ["LANGSMITH_API_KEY"]

''

In [5]:
from typing import Optional

from pydantic import BaseModel, Field

class Person(BaseModel):
    """Person 정보"""
    name: Optional[str] = Field(default=None, description="Person의 이름")
    hair_color: Optional[str] = Field(default=None, description="Person의 머리색")
    height_in_meters: Optional[str] = Field(default=None, description="Person의 키")

최상의 성능을 얻으려면 스키마를 잘 문서화하고 텍스트에 추출할 정보가 없는 경우 모델이 결과를 반환하도록 강요되지 않도록 해야합니다. -> None을 허용해야한다.

In [8]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages([
    ("system", "You are an expert extraction algorithm."
    "Only extract relevant information from the text."
    "If you do not know the value of an attribute asked to extract, "
    "return null for the attribute's value.",
    ),
    ("human", "{text}"),
])

In [9]:
!pip install -qU "langchain[google-genai]"

In [13]:
if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

In [14]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

In [15]:
structured_llm = llm.with_structured_output(schema=Person)

In [19]:
text = "Alan Smith is 6 feet tall and has blond hair."
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Person(name='Alan Smith', hair_color='blond', height_in_meters='1.8288')

In [20]:
from typing import List, Optional

from pydantic import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    # ^ Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field(default=None, description="The name of the person")
    hair_color: Optional[str] = Field(
        default=None, description="The color of the person's hair if known"
    )
    height_in_meters: Optional[str] = Field(
        default=None, description="Height measured in meters"
    )


class Data(BaseModel):
    """Extracted data about people."""

    # Creates a model so that we can extract multiple entities.
    people: List[Person]

In [21]:
structured_llm = llm.with_structured_output(schema=Data)
text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Data(people=[Person(name='Jeff', hair_color='black', height_in_meters='6 feet tall'), Person(name='Anna', hair_color='black', height_in_meters=None)])

In [22]:
messages = [
    {"role": "user", "content": "2 🦜 2"},
    {"role": "assistant", "content": "4"},
    {"role": "user", "content": "2 🦜 3"},
    {"role": "assistant", "content": "5"},
    {"role": "user", "content": "3 🦜 4"},
]

response = llm.invoke(messages)
print(response.content)

7


In [23]:
from langchain_core.utils.function_calling import tool_example_to_messages

examples = [
    (
        "The ocean is vast and blue. It's more than 20,000 feet deep.",
        Data(people=[]),
    ),
    (
        "Fiona traveled far from France to Spain.",
        Data(people=[Person(name="Fiona", height_in_meters=None, hair_color=None)]),
    ),
]


messages = []

for txt, tool_call in examples:
    if tool_call.people:
        # This final message is optional for some providers
        ai_response = "Detected people."
    else:
        ai_response = "Detected no people."
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

  messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))


In [24]:
for message in messages:
    message.pretty_print()


The ocean is vast and blue. It's more than 20,000 feet deep.
Tool Calls:
  Data (e9a1856f-d927-4aa8-ae1f-a6b88534440a)
 Call ID: e9a1856f-d927-4aa8-ae1f-a6b88534440a
  Args:
    people: []

You have correctly called this tool.

Detected no people.

Fiona traveled far from France to Spain.
Tool Calls:
  Data (0c331eab-0677-48c2-bfe7-3dba6422e07c)
 Call ID: 0c331eab-0677-48c2-bfe7-3dba6422e07c
  Args:
    people: [{'name': 'Fiona', 'hair_color': None, 'height_in_meters': None}]

You have correctly called this tool.

Detected people.


In [25]:
message_no_extraction = {
    "role": "user",
    "content": "The solar system is large, but earth has only 1 moon.",
}

structured_llm = llm.with_structured_output(schema=Data)
structured_llm.invoke([message_no_extraction])

Data(people=[Person(name='Earth', hair_color='blue', height_in_meters=None)])

In [26]:
structured_llm.invoke(messages + [message_no_extraction])

Data(people=[])