In [94]:
from datetime import datetime
from typing import Any, Dict, List

from pydantic import BaseModel, Field, ValidationError, ValidationInfo, field_validator, model_validator
from rich.pretty import pprint
from typing_extensions import Self

In [42]:
class User(BaseModel):
    id: int = Field(..., description="The user id", examples=[1, 2, 3])
    name: str = Field(..., min_length=2, max_length=50)
    email: str = Field(..., description="Email of the user")
    birth_date: datetime
    is_active: bool = True

In [44]:
pprint(User.model_json_schema())
pprint(User.model_fields)

In [54]:
class Users(BaseModel):
    random_attribute: Dict[str, List[int]] = Field(..., description="A random attribute.")
    users: list[User]

In [53]:
pprint(Users.model_json_schema())
pprint(Users.model_fields)

Below is a showcase of how pydantic coerce, parse and validate user inputs.

In [None]:
try:
    user = User(
        id="123",                           # String input but coerced to int
        name="Alice",                       # String input with correct length
        email="alice@example.com",          # String input
        birth_date="1990-01-01T00:00:00",   # String input but parsed to datetime
        is_active="yes"                     # String input but coerced to bool
    )
    pprint(user)

    user_all_input_types_correct = User(
        id=123,
        name="Alice",
        email="alice@example.com",
        birth_date=datetime(1990, 1, 1),
        is_active=True
    )
    pprint(user_all_input_types_correct)
    assert user == user_all_input_types_correct
except ValidationError as exc:
    print("Validation error:\n")
    pprint(exc)

Below is a failed case where the parsing and validation fails, so it shows you
that actual type checking and data validation is taking place.

In [55]:
try:
    user = User(
        id="abc",  # Can't be parsed to int
        name=[1, 2, 3],
        email="not_an_email",
        birth_date="invalid_date",
        is_active=None
    )
    pprint(user)
except ValidationError as exc:
    print("Validation error:\n")
    pprint(exc)

Validation error:



## Field Validators

### Before

In [76]:
class ComplexUser(BaseModel):
    id: int
    name: str
    code: str
    status: str


Consider that your company the `id` all starts with the prefix `ID-` with _unique_
integers following it. Then the internal parser cannot coerce the string
`ID-12345` into an integer. Since the integers following `ID-` is unique,
we can just add a `field_validator` to extract the integer part and validate
it. We would want to use a `before` field validator because we want our
custom validation/parsing/coerce logic to happen _before_ the default
pydantic parsing logic.

In [77]:
try:
    model = ComplexUser(id="ID-12345", name="Prefixed ID", code="CODE_456", status="inactive")
    pprint(model)
except ValidationError as exc:
    pprint(exc)

To add the `before` validator, we can use the `field_validator` decorator.

In [78]:
class ComplexUser(BaseModel):
    id: int
    name: str
    code: str
    status: str


    @field_validator('id', mode='before')
    @classmethod
    def preprocess_id(cls: Self, v: Any) -> int:
        if isinstance(v, str) and v.startswith('ID-'):
                print(f"Preprocessing ID: {v}")
                return int(v[3:])
        return v


In [80]:
try:
    model = ComplexUser(id="ID-12345", name="John Doe", code="CODE_456", status="inactive")
    pprint(model)
except ValidationError as exc:
    pprint(exc)

Preprocessing ID: ID-12345


So we see that when the default pydantic parsing may fail, we can add
`before` field validators to handle the parsing and validation of the raw
input data first, before the default pydantic parsing logic takes over.

### After

In a similar vein, we can also add `after` field validators to handle the
parsed data after the default pydantic parsing logic has taken place.
The `after` field validator is useful for post-processing or additional
validation on parsed data. Due to the nature of the `after` field validator,
the parsed data is guaranteed to be of the correct type and is up to you
to post-process it.

Consider the case where you want to capitalize the `name` field after it
has been parsed. We will use `.title()` because we want to capitalize the
first letter of each word in the string and not just the first letter of
the entire string.

In [87]:
class ComplexUser(BaseModel):
    id: int
    name: str
    code: str
    status: str

    @field_validator('id', mode='before')
    @classmethod
    def preprocess_id(cls: Self, v: Any) -> int:
        if isinstance(v, str) and v.startswith('ID-'):
                print(f"Preprocessing ID: {v}")
                return int(v[3:])
        return v

    @field_validator('name', mode="after")
    @classmethod
    def capitalize_name(cls: Self, v: str) -> str:
        print(f"Capitalizing name: {v}")
        return v.title()

try:
    model = ComplexUser(id="ID-12345", name="john doe", code="CODE_456", status="inactive")
    pprint(model)
except ValidationError as exc:
    pprint(exc)

Preprocessing ID: ID-12345
Capitalizing name: john doe


We see that when user input a string that is all low caps `john doe`, the
`after` field validator will capitalize the first letter of each word in
the string.

However, since it happens after the validation internally, we can actually do
naughty things like changing the value of the field to something else. For example,
no one is stopping me from just returning a list of integers in the `after` field
`capitalize_name` validator.

In [88]:
class ComplexUser(BaseModel):
    id: int
    name: str
    code: str
    status: str

    @field_validator('id', mode='before')
    @classmethod
    def preprocess_id(cls: Self, v: Any) -> int:
        if isinstance(v, str) and v.startswith('ID-'):
                print(f"Preprocessing ID: {v}")
                return int(v[3:])
        return v

    @field_validator('name', mode="after")
    @classmethod
    def capitalize_name(cls: Self, v: str) -> str:
        print(f"Capitalizing name: {v}")
        return [1,2,3]

try:
    model = ComplexUser(id="ID-12345", name="john doe", code="CODE_456", status="inactive")
    pprint(model)
except ValidationError as exc:
    pprint(exc)

Preprocessing ID: ID-12345
Capitalizing name: john doe


And the code still runs without any errors. So be careful when using `after` field
validators as it can be used to change the value of the field to something else.

### Plain

Completely replaces Pydantic's internal validation and is responsible for all type checking and validation.
No other validators are called after this and this is useful when you need full control over validation logic.

In [93]:
class ComplexUser(BaseModel):
    id: int
    name: str
    code: str
    status: str

    @field_validator('id', mode='before')
    @classmethod
    def preprocess_id(cls: Self, v: Any) -> int:
        if isinstance(v, str) and v.startswith('ID-'):
                print(f"Preprocessing ID: {v}")
                return int(v[3:])
        return v

    @field_validator('name', mode="after")
    @classmethod
    def capitalize_name(cls: Self, v: str) -> str:
        print(f"Capitalizing name: {v}")
        return [1,2,3]

    @field_validator('code', mode='plain')
    @classmethod
    def validate_code(cls: Self, v: Any) -> str:
        if not isinstance(v, str) or not v.startswith('CODE_'):
            raise ValueError("Code must be a string starting with 'CODE_'")
        return v

try:
    model = ComplexUser(id="ID-12345", name="john doe", code="AAA", status="inactive")
    pprint(model)
except ValidationError as exc:
    pprint(exc)


try:
    model = ComplexUser(id="ID-12345", name="john doe", code="CODE_AAA", status="inactive")
    pprint(model)
except ValidationError as exc:
    pprint(exc)

Preprocessing ID: ID-12345
Capitalizing name: john doe


Preprocessing ID: ID-12345
Capitalizing name: john doe


### Wrap

See [discussion here](https://stackoverflow.com/questions/77007885/pydantic-v2-model-validatormode-wrap-how-to-use-modelwrapvalidatorhandl)
to get a glimpse of how to use `wrap` validator.

- Can run code before and after Pydantic's internal validation
- Receives a handler function to call the inner validator
- Can modify input before validation and output after validation
- Can catch and handle validation errors from inner validators

In [97]:
class ComplexUser(BaseModel):
    id: int
    name: str
    code: str
    status: str

    @field_validator('id', mode='before')
    @classmethod
    def preprocess_id(cls: Self, v: Any) -> int:
        if isinstance(v, str) and v.startswith('ID-'):
                print(f"Preprocessing ID: {v}")
                return int(v[3:])
        return v

    @field_validator('name', mode="after")
    @classmethod
    def capitalize_name(cls: Self, v: str) -> str:
        print(f"Capitalizing name: {v}")
        return [1,2,3]

    @field_validator('code', mode='plain')
    @classmethod
    def validate_code(cls: Self, v: Any) -> str:
        if not isinstance(v, str) or not v.startswith('CODE_'):
            raise ValueError("Code must be a string starting with 'CODE_'")
        return v

    @field_validator('status', mode='wrap')
    @classmethod
    def validate_status(cls, value: Any, handler: Any, info: ValidationInfo) -> str:
        # pre-processing
        if isinstance(value, str):
            value = value.upper()

        # inner validator
        try:
            validated = handler(value)
            pprint(validated)
        except ValueError as exc:
            raise ValueError(f"Invalid status: {exc}") from exc

        # post-processing
        if validated not in ['ACTIVE', 'INACTIVE']:
            raise ValueError("Status must be either 'ACTIVE' or 'INACTIVE'")

        return validated

In [98]:
try:
    model = ComplexUser(id="ID-12345", name="john doe", code="CODE_AAA", status="inactive")
    pprint(model)
except ValidationError as exc:
    pprint(exc)

Preprocessing ID: ID-12345
Capitalizing name: john doe


In [6]:
from pydantic import BaseModel, create_model
from typing import Type

def generate_dynamic_model(schema: dict) -> Type[BaseModel]:
    fields = {}
    for field_name, field_info in schema.items():
        field_type = field_info['type']
        # Check if 'default' is in the dictionary keys, use it or set to Ellipsis for required fields
        default_value = field_info["default"] if "default" in field_info else ...
        fields[field_name] = (field_type, default_value)
    return create_model('DynamicModel', **fields)

# Example Schema
schema = {
    'name': {'type': str},  # No default, required
    'age': {'type': int, 'default': None},  # Optional with default None
    'is_active': {'type': bool, 'default': True}  # Optional with default True
}

# Create and test the dynamic model
DynamicUser = generate_dynamic_model(schema)
user = DynamicUser(name='Alice', age=25)
pprint(user)


In [25]:
from pydantic import BaseModel, create_model, ValidationError
from typing import Type, Any, Dict, List

def generate_dynamic_model(schema: Dict[str, Dict[str, Any]]) -> Type[BaseModel]:
    """
    Dynamically creates a Pydantic model based on a provided schema dictionary.
    This version supports nested Pydantic models.

    :param schema: A dictionary where keys are field names and values are dictionaries
                   containing 'type' (which may be another Pydantic model) and optionally 'default'.
    :return: A dynamically created Pydantic model class.
    """
    fields = {}
    for field_name, field_info in schema.items():
        field_type = field_info['type']
        # Determine if a default value is provided or if the field is required
        if 'default' in field_info:
            default_value = field_info['default']
        else:
            default_value = ...
        fields[field_name] = (field_type, default_value)
        pprint(fields)

    # Create the model class dynamically
    return create_model('DynamicModel', **fields)

# Example of defining nested Pydantic models and using them in a dynamic schema
class Address(BaseModel):
    city: str
    postal_code: int

# Schema including a nested Pydantic model
schema = {
    'name': {'type': str},
    'age': {'type': int, 'default': None},  # Optional with default None
    'address': {'type': List[Address]}  # Nested Pydantic model, required
}

# Create and test the dynamic model
DynamicUser = generate_dynamic_model(schema)
try:
    user = DynamicUser(name='Alice', address=[{'city': 'New York', 'postal_code': 10001}])
    pprint(user)
except ValidationError as e:
    print(f"Validation Error: {e}")


In [13]:
DynamicUser.model_fields

{'name': FieldInfo(annotation=str, required=True),
 'age': FieldInfo(annotation=int, required=False),
 'address': FieldInfo(annotation=List[__main__.Address], required=True)}

In [16]:
schema = {
    'name': {'type': List[str]},
    'age': {'type': int, 'default': None},  # Optional with default None
    'address': {'type': Dict[str, str]}  # Nested Pydantic model, required
}

# Create and test the dynamic model
DynamicUser = generate_dynamic_model(schema)

In [17]:
DynamicUser.model_fields

{'name': FieldInfo(annotation=List[str], required=True),
 'age': FieldInfo(annotation=int, required=False),
 'address': FieldInfo(annotation=Dict[str, str], required=True)}

In [24]:
from pydantic import BaseModel, create_model, ValidationError, Field
from typing import Type, Any, Dict, List, Union, Optional, Tuple
import inspect
from pydantic.fields import FieldInfo

def generate_dynamic_model(schema: Dict[str, Dict[str, Any]], model_name: str = "DynamicModel") -> Type[BaseModel]:
    """
    Dynamically creates a Pydantic model based on a provided schema dictionary.
    This version supports nested Pydantic models, lists, optional fields, and field metadata.

    :param schema: A dictionary where keys are field names and values are dictionaries
                   containing 'type' and optionally 'default', 'optional', 'description', and other metadata.
    :param model_name: Name for the dynamically created model.
    :return: A dynamically created Pydantic model class.
    """
    fields: Dict[str, Tuple[Any, FieldInfo]] = {}
    for field_name, field_info in schema.items():
        field_type = field_info['type']

        # Handle nested Pydantic models
        if isinstance(field_type, dict):
            field_type = generate_dynamic_model(field_type, f"{model_name}_{field_name}")

        # Handle lists of Pydantic models or basic types
        elif isinstance(field_type, list) and len(field_type) == 1:
            if isinstance(field_type[0], dict):
                nested_type = generate_dynamic_model(field_type[0], f"{model_name}_{field_name}_Item")
                field_type = List[nested_type]
            else:
                field_type = List[field_type[0]]

        if field_info.get('optional', False):
            field_type = Optional[field_type]

        # Determine if a default value is provided
        default_value = field_info.get('default', ...)

        # Extract field metadata
        field_metadata = {k: v for k, v in field_info.items() if k not in ['type', 'default', 'optional']}

        # Create the field with metadata
        fields[field_name] = (field_type, Field(default=default_value, **field_metadata))

    pprint(fields)
    # Create the model class dynamically
    return create_model(model_name, **fields)

def print_model_structure(model: Type[BaseModel], indent: int = 0):
    """
    Recursively prints the structure of a Pydantic model, including field metadata.
    """
    for name, field in model.model_fields.items():
        print(name, field)
        # print("  " * indent + f"{name}: {field.outer_type_}")
        # if field.field_info.description:
        #     print("  " * (indent + 1) + f"Description: {field.field_info.description}")
        # if inspect.isclass(field.type_) and issubclass(field.type_, BaseModel):
        #     print_model_structure(field.type_, indent + 1)


schema = {
    'name': {'type': str, 'description': "The user's full name"},
    'age': {'type': int, 'optional': True, 'default': None, 'description': "The user's age in years"},
    'address': {
        'type': {
            'street': {'type': str, 'description': "Street name and number"},
            'city': {'type': str, 'description': "City name"},
            'postal_code': {'type': int, 'description': "Postal code or ZIP code"}
        },
        'description': "The user's mailing address"
    },
    'phone_numbers': {'type': [str], 'description': "List of user's phone numbers"},
    'tags': {
        'type': [{
            'name': {'type': str, 'description': "Tag name"},
            'value': {'type': int, 'description': "Tag value"}
        }],
        'description': "List of user tags"
    }
}

DynamicUser = generate_dynamic_model(schema, "User")
print("Model structure:")
print_model_structure(DynamicUser)


try:
    user = DynamicUser(
        name='Alice Johnson',
        address={'street': '123 Main St', 'city': 'New York', 'postal_code': 10001},
        phone_numbers=['123-456-7890', '098-765-4321'],
        tags=[{'name': 'tag1', 'value': 1}, {'name': 'tag2', 'value': 2}]
    )
    print("\nValid user instance:")
except ValidationError as e:
    print(f"Validation Error: {e}")

# Print model schema
print("\nModel JSON Schema:")
pprint(user)

Model structure:
name annotation=str required=True description="The user's full name"
age annotation=Union[int, NoneType] required=False description="The user's age in years"
address annotation=User_address required=True description="The user's mailing address"
phone_numbers annotation=List[str] required=True description="List of user's phone numbers"
tags annotation=List[__main__.User_tags_Item] required=True description='List of user tags'

Valid user instance:

Model JSON Schema:


In [23]:
BaseModel.model_fields

{}

In [22]:
pprint(user.model_fields)

In [26]:
from pydantic import BaseModel, create_model, Field
from typing import Type, Any, Dict, List, Optional

def generate_dynamic_model(schema: Dict[str, Dict[str, Any]], model_name: str = "DynamicModel") -> Type[BaseModel]:
    fields = {}
    for field_name, field_info in schema.items():
        field_type = field_info['type']

        # Handle nested structures
        if isinstance(field_type, dict):
            field_type = generate_dynamic_model(field_type, f"{model_name}_{field_name}")
        elif isinstance(field_type, list) and isinstance(field_type[0], dict):
            nested_type = generate_dynamic_model(field_type[0], f"{model_name}_{field_name}_Item")
            field_type = List[nested_type]

        # Handle optional fields
        if field_info.get('optional', False):
            field_type = Optional[field_type]

        # Create field with metadata
        field = Field(
            default=field_info.get('default', ...),
            description=field_info.get('description'),
        )

        fields[field_name] = (field_type, field)

    return create_model(model_name, **fields)

# Use the same schema as before
schema = {
    'name': {'type': str, 'description': "The user's full name"},
    'age': {'type': int, 'optional': True, 'default': None, 'description': "The user's age in years"},
    'address': {
        'type': {
            'street': {'type': str, 'description': "Street name and number"},
            'city': {'type': str, 'description': "City name"},
            'postal_code': {'type': int, 'description': "Postal code or ZIP code"}
        },
        'description': "The user's mailing address"
    },
    'phone_numbers': {'type': List[str], 'description': "List of user's phone numbers"},
    'tags': {
        'type': [{
            'name': {'type': str, 'description': "Tag name"},
            'value': {'type': int, 'description': "Tag value"}
        }],
        'description': "List of user tags"
    }
}

DynamicUser = generate_dynamic_model(schema, "User")

# You can now use DynamicUser as before

In [27]:
pprint(DynamicUser.model_fields)

In [30]:
from pydantic import BaseModel, create_model, Field
from typing import Type, Any, Dict, List, Optional, get_origin, get_args


def generate_dynamic_model(schema: Dict[str, Dict[str, Any]], model_name: str = "DynamicModel") -> Type[BaseModel]:
    fields = {}
    for field_name, field_info in schema.items():
        field_type = field_info["type"]

        # If field_type is a string, try to evaluate it
        if isinstance(field_type, str):
            try:
                field_type = eval(field_type)
                pprint(field_type)
            except NameError:
                # If evaluation fails, keep it as a string
                pass

        # Handle List types
        if get_origin(field_type) is List:
            field_type = List[get_args(field_type)[0]]

        # Handle optional fields
        if field_info.get("optional", False):
            field_type = Optional[field_type]

        # Create field with metadata
        field = Field(
            default=field_info.get("default", ...),
            description=field_info.get("description"),
        )

        fields[field_name] = (field_type, field)

    return create_model(model_name, **fields)


# Define Address model
class Address(BaseModel):
    street: str
    city: str
    postal_code: int


# Define Tag model
class Tag(BaseModel):
    name: str
    value: int


# Use the schema with predefined models
schema = {
    "name": {"type": str, "description": "The user's full name"},
    "age": {"type": int, "optional": True, "default": None, "description": "The user's age in years"},
    "address": {"type": "Address", "description": "The user's mailing address"},
    "phone_numbers": {"type": "List[str]", "description": "List of user's phone numbers"},
    "tags": {"type": "List[Tag]", "description": "List of user tags"},
}

DynamicUser = generate_dynamic_model(schema, "User")

# Print model structure
for name, field in DynamicUser.model_fields.items():
    print(f"{name}: {field.annotation}")

# Test the model
user = DynamicUser(
    name="Alice Johnson",
    address=Address(street="123 Main St", city="New York", postal_code=10001),
    phone_numbers=["123-456-7890", "098-765-4321"],
    tags=[Tag(name="tag1", value=1), Tag(name="tag2", value=2)],
)
pprint(user)

name: <class 'str'>
age: typing.Optional[int]
address: <class '__main__.Address'>
phone_numbers: typing.List[str]
tags: typing.List[__main__.Tag]


In [None]:
from pydantic import BaseModel, create_model, Field
from typing import Type, Any, Dict, List, Optional, get_origin, get_args, Callable
import functools


def generate_dynamic_model(schema: Dict[str, Dict[str, Any]], model_name: str = "DynamicModel") -> Type[BaseModel]:
    fields = {}
    for field_name, field_info in schema.items():
        field_type = field_info["type"]

        if isinstance(field_type, str):
            try:
                field_type = eval(field_type)
            except NameError:
                pass

        if get_origin(field_type) is List:
            field_type = List[get_args(field_type)[0]]

        if field_info.get("optional", False):
            field_type = Optional[field_type]

        field = Field(
            default=field_info.get("default", ...),
            description=field_info.get("description"),
        )

        fields[field_name] = (field_type, field)

    return create_model(model_name, **fields)


def patch_dynamic_model_generation(cls: Type, method_name: str):
    """
    Patches a class method to use dynamic model generation.

    :param cls: The class containing the method to be patched.
    :param method_name: The name of the method to be patched.
    """
    original_method = getattr(cls, method_name)

    @functools.wraps(original_method)
    def wrapped_method(self, *args, **kwargs):
        # Assume the first argument is the schema
        if args:
            schema = args[0]
        elif "schema" in kwargs:
            schema = kwargs["schema"]
        else:
            raise ValueError("Schema not provided in method arguments")

        # Generate the dynamic model
        model_name = kwargs.get("model_name", "DynamicModel")
        dynamic_model = generate_dynamic_model(schema, model_name)

        # Replace the schema argument with the generated model
        if args:
            args = (dynamic_model,) + args[1:]
        else:
            kwargs["schema"] = dynamic_model

        # Call the original method with the new arguments
        return original_method(self, *args, **kwargs)

    setattr(cls, method_name, wrapped_method)


# Example usage:
class MyClass:
    def process_schema(self, schema: Dict[str, Dict[str, Any]], model_name: str = "DynamicModel") -> Type[BaseModel]:
        # Original method implementation
        print(f"Processing schema for {model_name}")
        return create_model(model_name, **schema)


# Patch the method
patch_dynamic_model_generation(MyClass, "process_schema")

# Test the patched method
my_instance = MyClass()
schema = {
    "name": {"type": str, "description": "The user's full name"},
    "age": {"type": int, "optional": True, "default": None, "description": "The user's age in years"},
    "address": {"type": "Address", "description": "The user's mailing address"},
    "phone_numbers": {"type": "List[str]", "description": "List of user's phone numbers"},
}

result = my_instance.process_schema(schema, model_name="User")
print(result)