### Defining validator functions

In [2]:
from typing_extensions import Annotated
from pydantic import BaseModel, AfterValidator, WithJsonSchema


def name_must_contain_space(v: str) -> str:
    if " " not in v:
        raise ValueError("Name must contain a space.")
    return v

def uppercase_name(v: str) -> str:
    return v.upper()

FullName = Annotated[
    str, 
    AfterValidator(name_must_contain_space), 
    AfterValidator(uppercase_name),
    WithJsonSchema(
        {
            "type": "string",
            "description": "The user's full name",
        }
    )]

class UserDetail(BaseModel):
    age: int
    name: FullName

In [3]:
UserDetail(age=30, name="Jason Liu")

UserDetail(age=30, name='JASON LIU')

In [4]:
UserDetail.model_json_schema()

{'properties': {'age': {'title': 'Age', 'type': 'integer'},
  'name': {'description': "The user's full name",
   'title': 'Name',
   'type': 'string'}},
 'required': ['age', 'name'],
 'title': 'UserDetail',
 'type': 'object'}

In [5]:
try:
    person = UserDetail.model_validate({"age": 24, "name": "Jason"})
except Exception as e:
    print(e) # this informaion could be used as feedback for an LLM

1 validation error for UserDetail
name
  Value error, Name must contain a space. [type=value_error, input_value='Jason', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error


### Using Field

In [6]:
from pydantic import Field


Age = Annotated[int, Field(gt=0)]

class UserDetail(BaseModel):
    age: Age
    name: FullName

try:
    person = UserDetail(age=-10, name="Jason")
except Exception as e:
    print(e)

2 validation errors for UserDetail
age
  Input should be greater than 0 [type=greater_than, input_value=-10, input_type=int]
    For further information visit https://errors.pydantic.dev/2.6/v/greater_than
name
  Value error, Name must contain a space. [type=value_error, input_value='Jason', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error


### Providing context

In [8]:
from pydantic import ValidationInfo

def message_cannot_have_blacklisted_words(v: str, info: ValidationInfo) -> str:
    blacklist = info.context.get("blacklist", [])
    for word in blacklist:
        assert word not in v.lower(), f"`{word}` was found in the message `{v}`"
    return v

ModeratedStr = Annotated[str, AfterValidator(message_cannot_have_blacklisted_words)]

class Response(BaseModel):
    message: ModeratedStr


try:
    Response.model_validate(
        {"message": "I will kill them."},
        context={
            "blacklist": {
                "rob",
                "steal",
                "kill",
                "attack",
            }
        },
    )
except Exception as e:
    print(e)

1 validation error for Response
message
  Assertion failed, `kill` was found in the message `I will kill them.` [type=assertion_error, input_value='I will kill them.', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/assertion_error


### Using OpenAI moderation

In [9]:
from typing import Annotated
from pydantic import AfterValidator
from instructor import openai_moderation

import instructor
from openai import OpenAI

client = instructor.patch(OpenAI())

# This uses Annotated which is a new feature in Python 3.9
# To define custom metadata for a type hint.
ModeratedStr = Annotated[str, AfterValidator(openai_moderation(client=client))]


class Response(BaseModel):
    message: ModeratedStr


try:
    Response(message="I want to make them suffer the consequences")
except Exception as e:
    print(e)

1 validation error for Response
message
  Value error, `I want to make them suffer the consequences` was flagged for violence [type=value_error, input_value='I want to make them suffer the consequences', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error


### General validator

In [10]:
from instructor import llm_validator

HealthTopicStr = Annotated[
    str,
    AfterValidator(
        llm_validator(
            "don't talk about any other topic except health best practices and topics",
            openai_client=client,
        )
    ),
]


class AssistantMessage(BaseModel):
    message: HealthTopicStr


AssistantMessage(
    message="I would suggest you to visit Sicily as they say it is very nice in winter."
)

ValidationError: 1 validation error for AssistantMessage
message
  Assertion failed, The statement is not related to health best practices or topics. [type=assertion_error, input_value='I would suggest you to v...is very nice in winter.', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/assertion_error

### Avoid hallucination with citation

In [11]:
from pydantic import ValidationInfo

def citation_exists(v: str, info: ValidationInfo):
    context = info.context
    if context:
        context = context.get("text_chunk")
        if v not in context:
            raise ValueError(f"Citation `{v}` not found in text, only use citations from the text.")
    return v

Citation = Annotated[
    str,
    AfterValidator(citation_exists),
    WithJsonSchema({
        "type": "string",
        "description": "For every answer provide an exact substring match to the context"
    })
]


class AnswerWithCitation(BaseModel):
    answer: str
    citation: Citation

try:
    AnswerWithCitation.model_validate(
        {
            "answer": "Jason is cool",
            "citation": "Jason is a cool person",
        },
        context={"text_chunk": "Jason is just a normal guy"},
    )
except Exception as e:
    print(e)

1 validation error for AnswerWithCitation
citation
  Value error, Citation `Jason is a cool person` not found in text, only use citations from the text. [type=value_error, input_value='Jason is a cool person', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error


### Reasking with validators

In [12]:
class QuestionAnswer(BaseModel):
    question: str
    answer: str


question = "What is the meaning of life?"
context = (
    "The according to the devil the meaning of life is a life of sin and debauchery."
)


resp = client.chat.completions.create(
    model="gpt-3.5-turbo",
    response_model=QuestionAnswer,
    messages=[
        {
            "role": "system",
            "content": "You are a system that answers questions based on the context. answer exactly what the question asks using the context.",
        },
        {
            "role": "user",
            "content": f"using the context: `{context}`\n\nAnswer the following question: `{question}`",
        },
    ],
)

print(resp.model_dump_json(indent=2))

{
  "question": "What is the meaning of life?",
  "answer": "According to the devil the meaning of life is a life of sin and debauchery."
}


In [16]:
from instructor import llm_validator


NotEvilAnswer = Annotated[
    str,
    AfterValidator(
        llm_validator("don't say objectionable things", openai_client=client)
    ),
]


class QuestionAnswer(BaseModel):
    question: str
    answer: NotEvilAnswer


resp = client.chat.completions.create(
    model="gpt-3.5-turbo",
    response_model=QuestionAnswer,
    max_retries=2,
    messages=[
        {
            "role": "system",
            "content": "You are a system that answers questions based on the context. answer exactly what the question asks using the context.",
        },
        {
            "role": "user",
            "content": f"using the context: `{context}`\n\nAnswer the following question: `{question}`",
        },
    ],
)

ValidationError: 1 validation error for QuestionAnswer
answer
  Assertion failed, The statement promotes objectionable content by suggesting a life of sin and debauchery as the meaning of life. [type=assertion_error, input_value='According to the devil, ... of sin and debauchery.', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/assertion_error

In [17]:
print(resp.model_dump_json(indent=2))

{
  "question": "What is the meaning of life?",
  "answer": "According to the devil the meaning of life is a life of sin and debauchery."
}
