In [None]:
from typing import List

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_ollama import ChatOllama



class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]

llm = ChatOllama(model="llama3.1", temperature=0)

# Set up a parser
parser = PydanticOutputParser(pydantic_object=People)

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Wrap the output in ```json and `` tags\n{format_instructions}"
        ),
        ("human", "{query}"),
    ]
).partial(format_instructions=parser.get_format_instructions())

chain = prompt | llm



chain.invoke({})

# Extraction chain: 1 person

In [33]:
# https://python.langchain.com/docs/tutorials/extraction/#the-schema

from typing import Optional
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from pydantic import BaseModel, Field
from langchain_ollama import ChatOllama
from pydantic import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    # ^ Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field(default=None, description="The name of the person")
    hair_color: Optional[str] = Field(default=None, description="The color of the person's hair if known")
    height_in_meters: Optional[str] = Field(default=None, description="Height measured in meters")


# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)


llm = ChatOllama(model="llama3.1", temperature=0)

runnable = prompt | llm.with_structured_output(schema=Person)

text = "Alan Smith is 6 feet tall and has blond hair."
runnable.invoke({"text": text})

Person(name='Alan Smith', hair_color='blond', height_in_meters='1.83')

# Extraction chain: multiple persons

In [34]:
# https://python.langchain.com/docs/tutorials/extraction/#multiple-entities

from typing import Optional, List
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from pydantic import BaseModel, Field
from langchain_ollama import ChatOllama
from pydantic import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    # ^ Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field(default=None, description="The name of the person")
    # hair_color: Optional[str] = Field(default=None, description="The color of the person's hair if known")
    # height_in_meters: Optional[str] = Field(default=None, description="Height measured in meters")
    # religion: Optional[str] = Field(default=None, description="the religion of the person")


class Data(BaseModel):
    """People present in a text."""

    # Creates a model so that we can extract multiple entities.
    people: List[Person]

# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract asked information from the text. "
            # "If you do not know the value of an attribute asked to extract, "
            # "return null for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

llm = ChatOllama(model="llama3.1", temperature=0, base_url="http://127.0.0.1:5000")

runnable = prompt | llm.with_structured_output(schema=Data)

text = "My name is Jeff, my hair is black and i am 6 feet tall and I am christian. Anna has the same color hair as me."
response = runnable.invoke({"text": text})

response

ValidationError: 1 validation error for Data
people
  value is not a valid list (type=type_error.list)