In [48]:
!pip install -q kor markdownify requests pydantic openai colorama bs4 rich

## Field Aliases and Default Values
So far we have seen how to define field names, types and default validation behavior using just type hinting and default values.

But Pydantic has a more powerful way of attaching field configuration to fields in our models.

This is done via the Field object.

To configure fields beyond what we've seen so far, we can create an instance of this Field object, configure settings on it, and attach it to a field by using it as the default value.

This means we'll need a slightly different way to specify both a Field and a default value at the same time (we'll specify the default value in the Field object itself).

Let's take a look at how we can use the Field object to define aliases for our fields.

In [49]:
def combine_name(first_name:str,last_name:str):
  print(first_name.capitalize(),last_name.capitalize())

combine_name('jane', 'mori')

Jane Mori


In [50]:
# def process(items: list[str]):
#   for i in items:
#     print(i)

def process_items(item_t: tuple[int,int,str], items_s: set[str]):
  return item_t, items_s

process_items(item_t=(1,2,'LOFO'),items_s={1,2,3})


((1, 2, 'LOFO'), {1, 2, 3})

In [51]:
def process_items(items: dict[str, int | float]):
  print(items)

process_items({
    "id": 1,
    "age": 2332.23
})


{'id': 1, 'age': 2332.23}


# classes, typing and functions

In [52]:
class Song:
    def __init__(self, title: str) -> None:
        self.title = title

class PlayList:
    def __init__(self, song: Song) -> None:
        self.song = song

    def download_songs(self):  # Added self parameter
        print(self.song.title)  # Accessing title through self

s = Song('follow me')
playlist = PlayList(s)
playlist.download_songs()

follow me


In [53]:
from datetime import datetime, date, time
from pydantic import BaseModel, ValidationError


class SongValidation(BaseModel):
    id: int
    title: str
    artist: str
    releasedDate: date
    duration: time


external_data = {
    "id": "AD",
    "title": "NEW SONG",
    "artist": "Maria",
    "releasedDate": "2023-02-02",
    "duration": "02:04:00"
}

try:
    s = SongValidation(**external_data)
    print(s.artist)
except ValidationError as exec:
    print(exec.errors())

[{'type': 'int_parsing', 'loc': ('id',), 'msg': 'Input should be a valid integer, unable to parse string as an integer', 'input': 'AD', 'url': 'https://errors.pydantic.dev/2.6/v/int_parsing'}]


In [54]:
from pydantic import BaseModel, Field, ValidationError

class Model(BaseModel):
    id_: int = Field(alias="id")
    last_name: str = Field(alias="lastName")

json_data = """
{
    "id": 100,
    "lastName": "Gauss"
}
"""

m = Model.model_validate_json(json_data)

m

Model(id_=100, last_name='Gauss')

In [55]:
class Model(BaseModel):
    id_: int = Field(alias="id", default=100)
    last_name: str = Field(alias="lastName")

Model(lastName="Gauss")

Model(id_=100, last_name='Gauss')

In [56]:
Model(id=1, lastName="Newton")

Model(id_=1, last_name='Newton')

In [57]:
class Person(BaseModel):
    id_: int = Field(alias="id")
    first_name: str | None = Field(alias="firstName", default=None)
    last_name: str = Field(alias="lastName")
    age: int | None = None

isaac = Person(id=1, firstName="Isaac", lastName="Newton", age=84)
isaac

Person(id_=1, first_name='Isaac', last_name='Newton', age=84)

The above serialization use the field names. To use the aliases instead (or the field name if no alias is defined), we supply an argument to the dump methods:

In [58]:
isaac.model_dump(by_alias=True)

{'id': 1, 'firstName': 'Isaac', 'lastName': 'Newton', 'age': 84}

In [59]:
Person.model_fields

{'id_': FieldInfo(annotation=int, required=True, alias='id', alias_priority=2),
 'first_name': FieldInfo(annotation=Union[str, NoneType], required=False, alias='firstName', alias_priority=2),
 'last_name': FieldInfo(annotation=str, required=True, alias='lastName', alias_priority=2),
 'age': FieldInfo(annotation=Union[int, NoneType], required=False)}

## Alias Generator Functions
When working with REST APIs, using camel case for deserializing/serializing data, and snake case for field names is extremely common.

One approach is to use the alias definitions we looked at in the previous video.

However, doing this for hundreds of fields and dozens of models is tedious and error prone.

Converting from snake case to camel case is rather systematic, and we could write a function to do this. https://docs.pydantic.dev/latest/api/config/#pydantic.alias_generators

In [60]:
from pydantic.alias_generators import to_camel, to_snake, to_pascal

to_camel("last_name")

'lastName'

In [61]:
to_snake("lastName")

'last_name'

In [62]:
to_pascal("last_name")

'LastName'

The reason we have those functions is that we can configure our model to auto generate field aliases using one of those functions.

But any function that converts one string (the field name) into another (an alias) can also be used.

In [63]:
def make_upper(in_str: str) -> str:
    return in_str.upper()

make_upper("last_name")

'LAST_NAME'

Now let's attach this function to our model definition, using, of course, model_config:

In [64]:
from pydantic import BaseModel, ConfigDict, Field, ValidationError

class Person(BaseModel):
    model_config = ConfigDict(alias_generator=make_upper)

    id_: int
    first_name: str | None = None
    last_name: str
    age: int | None = None

Person.model_fields

{'id_': FieldInfo(annotation=int, required=True, alias='ID_', alias_priority=1),
 'first_name': FieldInfo(annotation=Union[str, NoneType], required=False, alias='FIRST_NAME', alias_priority=1),
 'last_name': FieldInfo(annotation=str, required=True, alias='LAST_NAME', alias_priority=1),
 'age': FieldInfo(annotation=Union[int, NoneType], required=False, alias='AGE', alias_priority=1)}

In [65]:
p = Person(ID_=1, LAST_NAME="Fourier", AGE=62)
p

Person(id_=1, first_name=None, last_name='Fourier', age=62)

In [66]:
p.model_dump()

{'id_': 1, 'first_name': None, 'last_name': 'Fourier', 'age': 62}

You'll notice how id_'s alias became ID_ - maybe we don't want that, in which case we can override it:

In [67]:
class Person(BaseModel):
    model_config = ConfigDict(alias_generator=make_upper)

    id_: int = Field(alias="ID")
    first_name: str | None = None
    last_name: str
    age: int | None = None

In [68]:
Person.model_fields

{'id_': FieldInfo(annotation=int, required=True, alias='ID', alias_priority=2),
 'first_name': FieldInfo(annotation=Union[str, NoneType], required=False, alias='FIRST_NAME', alias_priority=1),
 'last_name': FieldInfo(annotation=str, required=True, alias='LAST_NAME', alias_priority=1),
 'age': FieldInfo(annotation=Union[int, NoneType], required=False, alias='AGE', alias_priority=1)}

In [69]:
p = Person(ID=1, LAST_NAME="Fourier", AGE=62)
p

Person(id_=1, first_name=None, last_name='Fourier', age=62)

In [70]:
class Person(BaseModel):
    model_config = ConfigDict(alias_generator=to_camel)

    id_: int = Field(alias="id")
    first_name: str | None = None
    last_name: str
    age: int | None = None

Person.model_fields


{'id_': FieldInfo(annotation=int, required=True, alias='id', alias_priority=2),
 'first_name': FieldInfo(annotation=Union[str, NoneType], required=False, alias='firstName', alias_priority=1),
 'last_name': FieldInfo(annotation=str, required=True, alias='lastName', alias_priority=1),
 'age': FieldInfo(annotation=Union[int, NoneType], required=False, alias='age', alias_priority=1)}

In [71]:
p = Person(id=1, lastName="Fourier", age=62)
p

Person(id_=1, first_name=None, last_name='Fourier', age=62)

In [72]:
p.model_dump(by_alias=True)

{'id': 1, 'firstName': None, 'lastName': 'Fourier', 'age': 62}

It is customary, when the data we are deserializing contains Python reserved words, to simply define the field name as that name with an underscore (_) appended to the field name, e.g.:
```
id -> id_
list -> list_
filter -> filter_
```
We could certainly use the method we just saw, setting an alias generator, and overriding these special cases one by one:

In [73]:
class Model(BaseModel):
    model_config = ConfigDict(alias_generator=to_camel)

    id_: int = Field(alias="id")
    list_: list[str] = Field(alias="list")
    filter_: dict = Field(alias="filter")
    number_elements: list[int]

Model.model_fields

{'id_': FieldInfo(annotation=int, required=True, alias='id', alias_priority=2),
 'list_': FieldInfo(annotation=list[str], required=True, alias='list', alias_priority=2),
 'filter_': FieldInfo(annotation=dict, required=True, alias='filter', alias_priority=2),
 'number_elements': FieldInfo(annotation=list[int], required=True, alias='numberElements', alias_priority=1)}

But since we are completely consistent with how we generate these aliases (basically we convert to camel case, then strip our any trailing underscore), we could actually do this completely with a custom alias generator function:

In [74]:
def make_alias(field_name: str) -> str:
    alias = to_camel(field_name)
    return alias.removesuffix("_")

make_alias("id_")

'id'

In [75]:
make_alias("number_elements")

'numberElements'

In [76]:
class Model(BaseModel):
    model_config = ConfigDict(alias_generator=make_alias)

    id_: int
    list_: list[str]
    filter_: dict
    number_elements: list[int]

Model.model_fields

{'id_': FieldInfo(annotation=int, required=True, alias='id', alias_priority=1),
 'list_': FieldInfo(annotation=list[str], required=True, alias='list', alias_priority=1),
 'filter_': FieldInfo(annotation=dict, required=True, alias='filter', alias_priority=1),
 'number_elements': FieldInfo(annotation=list[int], required=True, alias='numberElements', alias_priority=1)}

## Deserializing by Field Name or Alias
We saw that when we deserialize data, fields that have an alias defined, must use the aliases in the data.

We can modify this behavior, to allow deserializing using either the alias or the field name.

We do this by configuring the model using model_config.

In [77]:
from pydantic import BaseModel, ConfigDict, Field, ValidationError

class Model(BaseModel):
    id_: int = Field(alias="id")
    first_name: str = Field(alias="firstName")

try:
    Model(id_=10, first_name="Newton")
except ValidationError as ex:
    print(ex)

2 validation errors for Model
id
  Field required [type=missing, input_value={'id_': 10, 'first_name': 'Newton'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing
firstName
  Field required [type=missing, input_value={'id_': 10, 'first_name': 'Newton'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing


And same if we try to deserialize a dict or JSON:

In [78]:
data = {
    "id_": 10,
    "first_name": "Newton"
}

try:
    Model.model_validate(data)
except ValidationError as ex:
    print(ex)

2 validation errors for Model
id
  Field required [type=missing, input_value={'id_': 10, 'first_name': 'Newton'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing
firstName
  Field required [type=missing, input_value={'id_': 10, 'first_name': 'Newton'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing


In [79]:
class Model(BaseModel):
    model_config = ConfigDict(populate_by_name=True)

    id_: int = Field(alias="id")
    first_name: str = Field(alias="firstName")

Model(id_=10, first_name="Newton")

Model(id_=10, first_name='Newton')

In [80]:
data = {
    "id_": 10,
    "first_name": "Newton"
}

Model.model_validate(data)

Model(id_=10, first_name='Newton')

LETS COMBINE EVERYTHING


In [81]:
from pydantic.alias_generators import to_camel

class Person(BaseModel):
    model_config = ConfigDict(
        alias_generator=to_camel,
        populate_by_name=True,
        extra="forbid"
    )

    id_: int = Field(alias="id", default=1)
    first_name: str | None = None
    last_name: str
    age: int | None = None

In [82]:
p = Person(id=10, first_name='Isaac', lastName='Newton', age=84)
p

Person(id_=10, first_name='Isaac', last_name='Newton', age=84)

In [83]:
data_json = """
{
    "id": 10,
    "firstName": "Isaac",
    "last_name": "Newton",
    "age": 84
}
"""

p = Person.model_validate_json(data_json)
p

Person(id_=10, first_name='Isaac', last_name='Newton', age=84)

In [84]:
p.model_dump()

{'id_': 10, 'first_name': 'Isaac', 'last_name': 'Newton', 'age': 84}

In [85]:
p.model_dump(by_alias=True)

{'id': 10, 'firstName': 'Isaac', 'lastName': 'Newton', 'age': 84}

## Aliases for Serialization
On occasion, it becomes necessary to employ distinct aliases during the process of serializing and deserializing data.

Consider the scenario where we are developing an API featuring an endpoint that retrieves information from another API and subsequently forwards it to the caller.

The external API we interface with may furnish field names that occasionally diverge from the conventions upheld within our own API. Thus, we find ourselves in a situation where we must handle one field name upon data reception from the external API, while opting to serialize the data under a different identifier.

For instance, the JSON output from the external API might resemble the following:

In [86]:
response_json = """
{
    "ID": 100,
    "FirstName": "Isaac",
    "lastname": "Newton"
}
"""

class Person(BaseModel):
    id_: int = Field(alias="ID")
    first_name: str = Field(alias="FirstName")
    last_name: str = Field(alias="lastname")

p = Person.model_validate_json(response_json)
p

Person(id_=100, first_name='Isaac', last_name='Newton')

In [87]:
p.model_dump()

{'id_': 100, 'first_name': 'Isaac', 'last_name': 'Newton'}

In [88]:
p.model_dump(by_alias=True)

{'ID': 100, 'FirstName': 'Isaac', 'lastname': 'Newton'}

We can override the serialization alias, using the Field object:

In [89]:
class Person(BaseModel):
    id_: int = Field(alias="ID", serialization_alias="id")
    first_name: str = Field(alias="FirstName", serialization_alias="firstName")
    last_name: str = Field(alias="lastname", serialization_alias="lastName")

p = Person.model_validate_json(response_json)

p.model_dump(by_alias=True)

{'id': 100, 'firstName': 'Isaac', 'lastName': 'Newton'}

## Validation Aliases
So far we have seen to we can use aliases (or names) for deserializing data, and also specifying different aliases when serializing.

Although this is not common, you can also specify different validation aliases.

Essentially this allows you to specify different aliases for deserialization, and then use either aliases or serialization aliases for serialization.

It's a bit confusing, what if you have all three aliases defined - a validation alias, an alias, and a serialization alias.

Think of a plain alias as the default for either deserialization or serialization, and serialization aliases and validation aliases as overrides.

Let's take a look at a simple example with just a validation alias:

In [90]:
class Model(BaseModel):
    model_config = ConfigDict(populate_by_name=True)

    first_name: str = Field(validation_alias="FirstName")

data = {"FirstName": "Isaac"}
try:
    Model.model_validate(data)
except ValidationError as ex:
    print(ex)

In [91]:
class Model(BaseModel):
    model_config = ConfigDict(populate_by_name=True)

    first_name: str = Field(validation_alias="FirstName", alias="firstName")

In [92]:
m = Model.model_validate(data)
m

Model(first_name='Isaac')

In [93]:
m.model_dump(by_alias=True)

{'firstName': 'Isaac'}

If we add a serialization alias, we'll get a different serialization:

In [94]:
class Model(BaseModel):
    model_config = ConfigDict(populate_by_name=True)

    first_name: str = Field(
        validation_alias="FirstName",
        alias="firstName",
        serialization_alias="givenName"
    )

In [95]:
m = Model.model_validate(data)
m

Model(first_name='Isaac')

In [96]:
m.model_dump(by_alias=True)

{'givenName': 'Isaac'}

Consider this example, where we use auto generated aliases, but one field requires special aliasing for either validation or serialization:

In [98]:
from pydantic.alias_generators import to_camel

class Model(BaseModel):
    model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)

    first_name: str
    last_name: str
data = {
    "firstName": "Isaac",
    "lastName": "Newton"
}
m = Model.model_validate(data)
m.model_dump()

{'first_name': 'Isaac', 'last_name': 'Newton'}

Now, where things get interesting is that when specifying a validation alias you can actually define multiple of these - this way Pydantic will look for a field with any of the specified validation aliases, and use it to match to the field.

In [99]:
from pydantic import AliasChoices

class Model(BaseModel):
    model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)

    first_name: str = Field(
        validation_alias=AliasChoices("FirstName", "GivenName"),
        serialization_alias="givenName"
    )
    last_name: str

data = {
    "FirstName": "Isaac",
    "lastName": "Newton"
}
m = Model.model_validate(data)
m

Model(first_name='Isaac', last_name='Newton')

The most common use case I can think of, is when dealing with settings files. You may have settings that define connection strings to various resources, but each resource uses a different name for that setting.

Something like this:

In [100]:
data = {
    "databases": {
        "redis": {
            "name": "Local Redis",
            "redis_conn": "redis://secret@localhost:9000/1"
        },
        "pgsql": {
            "name": "Local Postgres",
            "pgsql_conn": "postgresql://user:secret@localhost"
        },
        "nosql": {
            "name": "Local MongoDB",
            "mongo_conn": "mongodb://USERNAME:PASSWORD@HOST/DATABASE"
        }
    }
}

In [101]:
class Database(BaseModel):
    name: str
    connection: str = Field(
        validation_alias=AliasChoices("redis_conn", "pgsql_conn", "mongo_conn")
    )

databases = {}

for key, value in data["databases"].items():
    m = Database.model_validate(value)
    databases[key] = m

databases

{'redis': Database(name='Local Redis', connection='redis://secret@localhost:9000/1'),
 'pgsql': Database(name='Local Postgres', connection='postgresql://user:secret@localhost'),
 'nosql': Database(name='Local MongoDB', connection='mongodb://USERNAME:PASSWORD@HOST/DATABASE')}

In [102]:
class Databases(BaseModel):
    databases: dict[str, Database]

In [103]:
databases = Databases.model_validate(data)
databases

Databases(databases={'redis': Database(name='Local Redis', connection='redis://secret@localhost:9000/1'), 'pgsql': Database(name='Local Postgres', connection='postgresql://user:secret@localhost'), 'nosql': Database(name='Local MongoDB', connection='mongodb://USERNAME:PASSWORD@HOST/DATABASE')})

In [104]:
print(databases.model_dump_json(indent=2))

{
  "databases": {
    "redis": {
      "name": "Local Redis",
      "connection": "redis://secret@localhost:9000/1"
    },
    "pgsql": {
      "name": "Local Postgres",
      "connection": "postgresql://user:secret@localhost"
    },
    "nosql": {
      "name": "Local MongoDB",
      "connection": "mongodb://USERNAME:PASSWORD@HOST/DATABASE"
    }
  }
}


## Custom Serializers
So far we have been happy with the way Pydantic serializes field values.

But sometimes, especially with certain data types, like datetimes, we may want to control how fields get serialized.

A typical example is to specify how a date or datetime object might get serialized.

Another example might be standardizing the number of decimal places used for floats.

Whatever your need is, you can control how field data gets serialized very easily.

We'll need to use a decorator function provided by Pydantic, called @field_serializer which is used to control serialization at the field level.

```
One important option is:

- when_used: by default the custom serializer is always used, but we have other options available:
- always: the default, serializer is executed when serializing either to a dict or to JSON
- unless-none: serializer is not used if the value is None
json: serializer is only used when serializing to JSON
- json-unless-none: serializer used when serializing to JSON, unless the value is None
```

In [105]:
from pydantic import BaseModel, field_serializer

from datetime import datetime

class Model(BaseModel):
    dt: datetime | None = None

    @field_serializer("dt", when_used="always")
    def serialize_name(self, value):
        print(f"type = {type(value)}")
        return value

In [106]:
m = Model(dt="2020-01-01T12:00:00")
m

Model(dt=datetime.datetime(2020, 1, 1, 12, 0))

In [107]:
class User(BaseModel):
    id: int
    username: str
    created_at: datetime

    @field_serializer("created_at")
    def serialize_created_at(self, dt: datetime, _info):
        return dt.strftime("%Y-%m-%d %H:%M:%S")


user = User(id=1, username="john_doe", created_at=datetime.now())
serialized_user = user.model_dump()

print(serialized_user)

{'id': 1, 'username': 'john_doe', 'created_at': '2024-02-23 10:47:59'}


In [108]:
import json
from dataclasses import asdict, dataclass
from datetime import date
from typing import Any


class DateEncoder(json.JSONEncoder):
    def default(self, obj: Any) -> Any:
        if isinstance(obj, date):
            return obj.strftime("%Y-%m-%d")
        return super().default(obj)


@dataclass
class User:
    name: str
    age: int
    birthday: date

    def __post_init__(self):
        if not isinstance(self.name, str):
            raise TypeError("name must be str")
        if len(self.name) < 4 or len(self.name) > 16:
            raise ValueError("name must be between 4 and 16 chars")
        if not isinstance(self.age, int):
            raise TypeError("age must be int")
        if self.age < 18 or self.age > 99:
            raise ValueError("age must be between 18 and 99")
        if not isinstance(self.birthday, date):
            raise TypeError("birthday must be date")

    def to_json(self) -> str:
        return json.dumps(asdict(self), cls=DateEncoder, indent=2)


if __name__ == "__main__":
    user = User(name="John", age=18, birthday=date(2000, 1, 1))
    print(user.to_json())

{
  "name": "John",
  "age": 18,
  "birthday": "2000-01-01"
}


In [109]:
# Importing date from the datetime module
from datetime import date

# Importing BaseModel and Field from Pydantic
from pydantic import BaseModel, Field

# Defining the User class
class User(BaseModel):
    # name attribute: str type with 4~16 characters
    name: str = Field(..., min_length=4, max_length=16)
    # age attribute: int type between 18~99
    age: int = Field(..., ge=18, le=99)
    # birthday attribute: date type
    birthday: date

# Main block
if __name__ == "__main__":
    # Creating an instance of the User class
    user = User(name="John", age=18, birthday=date(2000, 1, 1))
    # Printing the JSON representation of the model
    print(user.model_dump_json(indent=2))


{
  "name": "John",
  "age": 18,
  "birthday": "2000-01-01"
}


In [110]:
from datetime import date

from pydantic import BaseModel


class Task(BaseModel):
    name: str
    due_date: date


class User(BaseModel):
    name: str
    tasks: list[Task]


if __name__ == "__main__":
    user = User(
        name="John",
        tasks=[
            Task(name="task1", due_date=date(2023, 10, 26)),
            Task(name="task2", due_date=date(2023, 10, 27)),
        ],
    )
    print(user.model_dump_json(indent=2))

{
  "name": "John",
  "tasks": [
    {
      "name": "task1",
      "due_date": "2023-10-26"
    },
    {
      "name": "task2",
      "due_date": "2023-10-27"
    }
  ]
}


In [111]:
from pydantic import BaseModel, model_serializer
class Task(BaseModel):
    name: str
    due_date: date

    @model_serializer
    def serialize_date(self) -> dict[str, Any]:
        return {
            "name": self.name,
            "year": self.due_date.year,
            "month": self.due_date.month,
            "day": self.due_date.day,
        }


if __name__ == "__main__":
    task = Task(name="task1", due_date=date(2023, 10, 26))
    print(task.model_dump_json(indent=2))

{
  "name": "task1",
  "year": 2023,
  "month": 10,
  "day": 26
}


In [112]:
from datetime import datetime

from pydantic import BaseModel, field_serializer
from zoneinfo import ZoneInfo


class UserInfo(BaseModel):
    name: str
    age: int
    dt: datetime | None = None

    @field_serializer("dt",when_used="unless-none")
    def serialize_dt(self, dt: datetime) -> str:
        return dt.isoformat()


dt = datetime.now(tz=ZoneInfo("Asia/Tokyo"))

user_info = UserInfo(name="hogehoge", age=20, dt=dt)

print(user_info.model_dump())

{'name': 'hogehoge', 'age': 20, 'dt': '2024-02-23T20:11:04.308493+09:00'}


So, let's use this in our serializer, and configure the serializer to only apply to JSON serialization, and not when the value is None:

In [114]:
from datetime import datetime

class Model(BaseModel):
    dt: datetime | None = None

    @field_serializer("dt", when_used="json-unless-none")
    def serialize_name(self, value):
        print(f"type = {type(value)}")
        return value.strftime("%Y/%-m/%-d %I:%M %p")

m = Model(dt="2020-01-01T12:00:00")
m

Model(dt=datetime.datetime(2020, 1, 1, 12, 0))

In [115]:
m.model_dump_json()

type = <class 'datetime.datetime'>


'{"dt":"2020/1/1 12:00 PM"}'

Pydantic implements yet another argument that we can add to our serializer function - an argument with type FieldSerializationInfo. Let's take a look:

In [117]:
from pydantic import FieldSerializationInfo
class Model(BaseModel):
    dt: datetime | None = None

    @field_serializer("dt", when_used="unless-none")
    def dt_serializer(self, value, info: FieldSerializationInfo):
        print(f"info={info}")
        return value

m = Model(dt=datetime(2020, 1, 1))
m

Model(dt=datetime.datetime(2020, 1, 1, 0, 0))

In [118]:
m.model_dump_json()

info=SerializationInfo(include=None, exclude=None, mode='json', by_alias=False, exclude_unset=False, exclude_defaults=False, exclude_none=False, round_trip=False)


'{"dt":"2020-01-01T00:00:00"}'

In [125]:
from typing import Any, Callable
from pydantic import BaseModel
from datetime import datetime, timedelta, timezone


class ParentModel(BaseModel):
    foo: datetime

    def serialize(self, original_serializer: Callable[[BaseModel], dict[str, Any]]) -> dict[str, Any]:
        self.normalize_datetimes()
        result = original_serializer(self)
        self.convert_timedeltas(result)
        return result

    def normalize_datetimes(self):
        for field_name, field_info in self.__fields__.items():
            if field_info.type_ == datetime:
                setattr(
                    self,
                    field_name,
                    getattr(self, field_name).replace(tzinfo=timezone.utc),
                )

    def convert_timedeltas(self, result):
        for field_name, field_info in self.__fields__.items():
            if field_info.type_ == timedelta:
                result[field_name] = getattr(self, field_name).total_seconds()


class ChildModel(ParentModel):
    bar: datetime
    baz: timedelta
    qux: list[datetime]


foo = ChildModel(
    foo=datetime(2023, 3, 3, 3, 3),
    bar=datetime(2024, 4, 4, 4, 4),
    baz=timedelta(minutes=30, microseconds=10),
    qux=[datetime(2023, 5, 5, 5, 5), datetime(2023, 6, 6, 6, 6)],
)
print(foo.json())


{"foo":"2023-03-03T03:03:00","bar":"2024-04-04T04:04:00","baz":"PT1800.00001S","qux":["2023-05-05T05:05:00","2023-06-06T06:06:00"]}


We could use that, but FieldSerializationInfo offers us a method named mode_is_json that we can use instead.

In [119]:
class Model(BaseModel):
    dt: datetime | None = None

    @field_serializer("dt", when_used="unless-none")
    def dt_serializer(self, value, info: FieldSerializationInfo):
        print(f"mode_is_json={info.mode_is_json()}")
        return value

m = Model(dt=datetime(2020, 1, 1))
m.model_dump_json()

mode_is_json=True


'{"dt":"2020-01-01T00:00:00"}'

In [116]:
from typing import Any, Dict
from pydantic import BaseModel, Field, SerializationInfo, model_serializer
class SomeModel(BaseModel):
    lorem: str = Field(..., description="describe me")
    ipsum: int

    @model_serializer(when_used="always", mode="wrap")
    def clean_model(self, serializer: Any, info: SerializationInfo) -> Dict[str, Any]:
        data: Dict[str, Any] = serializer(self)
        return data

In [126]:
import json
from pydantic import BaseModel, field_serializer, field_validator
from typing import List
import numpy as np

class TestDataClass(BaseModel):
    id: int
    matrix: np.ndarray

    @field_serializer("matrix")   # Custom serializer for a field of a non-standard type (numpy array)
    def serialize_matrix(self, dt: np.ndarray, _info):
        ls = dt.tolist()
        dmp = json.dumps(ls)
        return dmp

    @field_validator("matrix", mode="before")  # Validator for a field of a non-standard type (numpy array), allowing
                                              # assigning it a value of type List (called in parse_raw)
    def check_matrix(cls, value):
        if isinstance(value, np.ndarray):
            return value
        elif isinstance(value, List):
            return np.array(value)
        elif isinstance(value, str):
            vl = json.loads(value)
            rv = np.array(vl)
            return rv
        else:
            raise ValueError("matrix may be numpy array or list")

    class Config:   # Allowing the use of a non-standard type (numpy array)
        arbitrary_types_allowed = True
to1 = TestDataClass(id=3802, matrix=np.array([[0, 0, 1], [0, 1, 0], [1, 0, 1]]))

to1js = to1.json()

print(json.dumps(to1js))

to2 = TestDataClass.parse_raw(to1js)

print(to2.stretch_id)

"{\"id\":3802,\"matrix\":\"[[0, 0, 1], [0, 1, 0], [1, 0, 1]]\"}"


AttributeError: 'TestDataClass' object has no attribute 'stretch_id'

We can easily write Python code to do this, using the pytzlibrary.

To complete this example, you'll need to make sure you have pytz installed in your virtual environment.

Let's write a simple Python function that will do the following, given a datetime object as an argument:

- if the datetime is naive, make it aware, and assume the naive datetime was already UTC
- if the datetime is aware, change it to be UTC



In [120]:
import pytz

def make_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        dt = pytz.utc.localize(dt)
    else:
        dt = dt.astimezone(pytz.utc)
    return dt
dt = make_utc(datetime.now())
dt

datetime.datetime(2024, 2, 23, 11, 34, 44, 982268, tzinfo=<UTC>)

In [121]:
def dt_utc_json_serializer(dt: datetime) -> str:
    dt = make_utc(dt)
    return dt.strftime("%Y-%m-%dT%H:%M:%SZ")

In [123]:
class Model(BaseModel):
    dt: datetime | None = None

    @field_serializer("dt", when_used="unless-none")
    def dt_serializer(self, dt, info: FieldSerializationInfo):
        if info.mode_is_json():
            return dt_utc_json_serializer(dt)
        return make_utc(dt)

m = Model(dt=datetime(2020, 1, 1))
m

Model(dt=datetime.datetime(2020, 1, 1, 0, 0))

- https://blog.csdn.net/weixin_43701894/article/details/132591483
- https://zenn.dev/nowa0402/articles/6b423af53274c2