In [1]:
%pip install -q kor markdownify requests pydantic pydantic[email] openai colorama bs4 rich

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.4/227.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m807.5/807.5 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.8/77.8 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.9/256.9 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━

Python provides a mechanism to attach metadata to any type.

In [12]:
from pydantic import BaseModel, ConfigDict, Field, ValidationError
from datetime import datetime, timezone
from typing import List,Dict,Set,Tuple, Union, Annotated, get_args, Any, TypeVar

SpecialInt = Annotated[int, "metadata 1", [1, 2, 3], 100]

get_args(SpecialInt)

(int, 'metadata 1', [1, 2, 3], 100)

Pydantic can make use of these type annotations.

They need to be annotations that Pydantic can understand (i.e. we have to use specific objects in our annotations). This provides a very flexible way to add functionality to our types, such as constraints, validators, and much more.

Let's start with a simple example.

In [4]:
class Model(BaseModel):
    x: int = Field(gt=0, le=100)
    y: int = Field(gt=0, le=100)
    z: int = Field(gt=0, le=100)

Model.model_fields

{'x': FieldInfo(annotation=int, required=True, metadata=[Gt(gt=0), Le(le=100)]),
 'y': FieldInfo(annotation=int, required=True, metadata=[Gt(gt=0), Le(le=100)]),
 'z': FieldInfo(annotation=int, required=True, metadata=[Gt(gt=0), Le(le=100)])}

In [5]:
BoundedInt = Annotated[int, Field(gt=0, le=100)]

class Model(BaseModel):
    x: BoundedInt
    y: BoundedInt
    z: BoundedInt

Model.model_fields

{'x': FieldInfo(annotation=int, required=True, metadata=[Gt(gt=0), Le(le=100)]),
 'y': FieldInfo(annotation=int, required=True, metadata=[Gt(gt=0), Le(le=100)]),
 'z': FieldInfo(annotation=int, required=True, metadata=[Gt(gt=0), Le(le=100)])}

In [6]:
try:
    Model(x=0, y=10, z=103)
except ValueError as ex:
    print(ex)

2 validation errors for Model
x
  Input should be greater than 0 [type=greater_than, input_value=0, input_type=int]
    For further information visit https://errors.pydantic.dev/2.6/v/greater_than
z
  Input should be less than or equal to 100 [type=less_than_equal, input_value=103, input_type=int]
    For further information visit https://errors.pydantic.dev/2.6/v/less_than_equal


In [7]:
class Model(BaseModel):
    field_1: Annotated[int, Field(gt=0)] = 1
    field_2: Annotated[str, Field(min_length=1, max_length=10)] | None = None

Model()

Model(field_1=1, field_2=None)

## Annotated Types and Type Variables

In [8]:
class Model(BaseModel):
    elements: list[int] = Field(default=[], max_length=10)

try:
    Model(elements = [1, ] * 20)
except ValidationError as ex:
    print(ex)

1 validation error for Model
elements
  List should have at most 10 items after validation, not 20 [type=too_long, input_value=[1, 1, 1, 1, 1, 1, 1, 1, ... 1, 1, 1, 1, 1, 1, 1, 1], input_type=list]
    For further information visit https://errors.pydantic.dev/2.6/v/too_long


In [9]:
BoundedListInt = Annotated[list[int], Field(max_length=10)]

class Model(BaseModel):
    field_1: BoundedListInt = []
    field_2: BoundedListInt = []

As you can see, the only thing that changes is that the type in list[] changes, from int, to float to str. And if we wanted other types, we'd have keep creating these types that are otherwise identical.

We could use the Any type in our annotation - but doing so we would lose the type validation Pydantic has to offer (since it would now accept lists that contained any type, including lists with mixed types) - which is probably not something we want.

In [11]:
BoundedList = Annotated[list[Any], Field(max_length=10)]



In [13]:
T = TypeVar('T')

BoundedList = Annotated[list[T], Field(max_length=10)]

## String Constraints

In [14]:
from pydantic import StringConstraints

In [15]:
class Model(BaseModel):
    name: str = Field(min_length=2, max_length=5)

In [16]:
StandardString = Annotated[
    str,
    StringConstraints(to_lower=True, min_length=2, strip_whitespace=True)
]

In [17]:
class Model(BaseModel):
    code: StandardString | None = None

In [18]:
Model()

Model(code=None)

##PROJECT

In [19]:
from datetime import date
from enum import Enum
from uuid import uuid4
from pydantic import BaseModel, ConfigDict, Field, field_serializer
from pydantic.alias_generators import to_camel
from pydantic import UUID4


class AutomobileType(Enum):
    sedan = "Sedan"
    coupe = "Coupe"
    convertible = "Convertible"
    suv = "SUV"
    truck = "Truck"


class Automobile(BaseModel):
    model_config = ConfigDict(
        extra="forbid",
        str_strip_whitespace=True,
        validate_default=True,
        validate_assignment=True,
        alias_generator=to_camel,
    )

    id_: UUID4 | None = Field(alias="id", default_factory=uuid4)
    manufacturer: str
    series_name: str
    type_: AutomobileType = Field(alias="type")
    is_electric: bool = False
    manufactured_date: date = Field(validation_alias="completionDate", ge=date(1980, 1, 1))
    base_msrp_usd: float = Field(
        validation_alias="msrpUSD",
        serialization_alias="baseMSRPUSD"
    )
    vin: str
    number_of_doors: int = Field(
        default=4,
        validation_alias="doors",
        ge=2,
        le=4,
        multiple_of=2,
    )
    registration_country: str | None = None
    license_plate: str | None = None

    @field_serializer("manufactured_date", when_used="json-unless-none")
    def serialize_date(self, value: date) -> str:
        return value.strftime("%Y/%m/%d")

In [20]:
from uuid import UUID

data = {
    "id": "c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7",
    "manufacturer": "BMW",
    "seriesName": "M4 Competition xDrive",
    "type": "Convertible",
    "isElectric": False,
    "completionDate": "2023-01-01",
    "msrpUSD": 93_300,
    "topFeatures": ["6 cylinders", "all-wheel drive", "convertible"],
    "vin": "1234567890",
    "doors": 2,
    "registrationCountry": "France",
    "licensePlate": "AAA-BBB"
}

expected_serialized_by_alias = {
    'id': UUID('c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7'),
    'manufacturer': 'BMW',
    'seriesName': 'M4 Competition xDrive',
    'type': AutomobileType.convertible,
    'isElectric': False,
    'manufacturedDate': date(2023, 1, 1),
    'baseMSRPUSD': 93300.0,
    'topFeatures': ['6 cylinders', 'all-wheel drive', 'convertible'],
    'vin': '1234567890',
    'numberOfDoors': 2,
    'registrationCountry': 'France',
    'licensePlate': 'AAA-BBB'
}

## Solution

In [21]:
from typing import Annotated, TypeVar
from pydantic import Field

BoundedString = Annotated[str, Field(min_length=2, max_length=50)]

T = TypeVar('T')

BoundedList = Annotated[list[T], Field(min_length=1, max_length=5)]

In [22]:
class Test(BaseModel):
    field1: BoundedString

Test(field1="abc")

Test(field1='abc')

In [23]:
try:
    Test(field1="a")
except ValidationError as ex:
    print(ex)

1 validation error for Test
field1
  String should have at least 2 characters [type=string_too_short, input_value='a', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/string_too_short


In [24]:
try:
    Test(field1="a" * 51)
except ValidationError as ex:
    print(ex)

1 validation error for Test
field1
  String should have at most 50 characters [type=string_too_long, input_value='aaaaaaaaaaaaaaaaaaaaaaaa...aaaaaaaaaaaaaaaaaaaaaaa', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/string_too_long


In [25]:
class Automobile(BaseModel):
    model_config = ConfigDict(
        extra="forbid",
        str_strip_whitespace=True,
        validate_default=True,
        validate_assignment=True,
        alias_generator=to_camel,
    )

    id_: UUID4 | None = Field(alias="id", default_factory=uuid4)
    manufacturer: BoundedString
    series_name: BoundedString
    type_: AutomobileType = Field(alias="type")
    is_electric: bool = False
    manufactured_date: date = Field(validation_alias="completionDate", ge=date(1980, 1, 1))
    base_msrp_usd: float = Field(
        validation_alias="msrpUSD",
        serialization_alias="baseMSRPUSD"
    )
    top_features: BoundedList[BoundedString] | None = None
    vin: BoundedString
    number_of_doors: int = Field(
        default=4,
        validation_alias="doors",
        ge=2,
        le=4,
        multiple_of=2,
    )
    registration_country: BoundedString | None = None
    license_plate: BoundedString | None = None

    @field_serializer("manufactured_date", when_used="json-unless-none")
    def serialize_date(self, value: date) -> str:
        return value.strftime("%Y/%m/%d")

In [26]:
car = Automobile.model_validate(data)
car

Automobile(id_=UUID('c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7'), manufacturer='BMW', series_name='M4 Competition xDrive', type_=<AutomobileType.convertible: 'Convertible'>, is_electric=False, manufactured_date=datetime.date(2023, 1, 1), base_msrp_usd=93300.0, top_features=['6 cylinders', 'all-wheel drive', 'convertible'], vin='1234567890', number_of_doors=2, registration_country='France', license_plate='AAA-BBB')