In [None]:
!pip install -q kor markdownify requests pydantic openai colorama bs4 rich

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.7/226.7 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m816.1/816.1 kB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.9/75.9 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.3/241.3 kB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━

In [None]:
from pydantic import BaseModel
from pydantic import ValidationError

class Person(BaseModel):
    first_name: str
    last_name: str
    age: int

    @property
    def display_name(self):
        return f"{self.first_name} {self.last_name[0]}"

p = Person(first_name="Evariste", last_name="Galois", age=20)

In [None]:
try:
    Person(first_name="Evariste", last_name="Galois", age=20)
except ValidationError as ex:
    print(ex)

## Deserialization
Deserialization is the act of taking data (that can be provided in a number of ways) to create and populate a new model instance.

We already saw one way of doing this:

In [None]:
from pydantic import BaseModel, ValidationError


class Person(BaseModel):
    first_name: str
    last_name: str
    age: int

    @property
    def display_name(self):
        return f"{self.first_name} {self.last_name[0]}"

data = {
    "first_name": "Isaac",
    "last_name": "Newton",
    "age": 84
}

In [None]:
Person(**data)

Person(first_name='Isaac', last_name='Newton', age=84)

In [None]:
p = Person.model_validate(data)
p

Person(first_name='Isaac', last_name='Newton', age=84)

The same validation exceptions we saw earlier will be raised when we have validation issues:

In [None]:
missing_data = {"last_name": "Newton"}

try:
    Person.model_validate(missing_data)
except ValidationError as ex:
    print(ex)

2 validation errors for Person
first_name
  Field required [type=missing, input_value={'last_name': 'Newton'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing
age
  Field required [type=missing, input_value={'last_name': 'Newton'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing


## PROJECT

In [None]:
# Python dictionary
from datetime import date

data = {
    "manufacturer": "BMW",
    "series_name": "M4",
    "type_": "Convertible",
    "is_electric": False,
    "manufactured_date": "2023-01-01",
    "base_msrp_usd": 93_300,
    "vin": "1234567890",
    "number_of_doors": 2,
    "registration_country": "France",
    "license_plate": "AAA-BBB",
}

data_expected_serialization = {
    'manufacturer': 'BMW',
    'series_name': 'M4',
    'type_': 'Convertible',
    'is_electric': False,
    'manufactured_date': date(2023,1,1),
    'base_msrp_usd': 93_300,
    'vin': '1234567890',
    'number_of_doors': 2,
    'registration_country': 'France',
    'license_plate': 'AAA-BBB',
}


# JSON
data_json = '''
{
    "manufacturer": "BMW",
    "series_name": "M4",
    "type_": "Convertible",
    "manufactured_date": "2023-01-01",
    "base_msrp_usd": 93300,
    "vin": "1234567890"
}
'''

data_json_expected_serialization = {
    'manufacturer': 'BMW',
    'series_name': 'M4',
    'type_': 'Convertible',
    'is_electric': False,
    'manufactured_date': date(2023, 1, 1),
    'base_msrp_usd': 93_300,
    'vin': '1234567890',
    'number_of_doors': 4,
    'registration_country': None,
    'license_plate': None,
}

In [None]:
from datetime import date
from pydantic import BaseModel


class Automobile(BaseModel):
    manufacturer: str
    series_name: str
    type_: str
    is_electric: bool = False
    manufactured_date: date
    base_msrp_usd: float
    vin: str
    number_of_doors: int = 4
    registration_country: str | None = None
    license_plate: str | None = None

In [None]:
car = Automobile.model_validate(data)
car

Automobile(manufacturer='BMW', series_name='M4', type_='Convertible', is_electric=False, manufactured_date=datetime.date(2023, 1, 1), base_msrp_usd=93300.0, vin='1234567890', number_of_doors=2, registration_country='France', license_plate='AAA-BBB')

In [None]:
assert car.model_dump() == data_expected_serialization

And using JSON deserialization:

In [None]:
car = Automobile.model_validate_json(data_json)
car

Automobile(manufacturer='BMW', series_name='M4', type_='Convertible', is_electric=False, manufactured_date=datetime.date(2023, 1, 1), base_msrp_usd=93300.0, vin='1234567890', number_of_doors=4, registration_country=None, license_plate=None)

In [None]:
assert car.model_dump() == data_json_expected_serialization


## MODEL CONFIGURATION

## Handling Extra Fields

In [15]:
from pydantic import BaseModel, ConfigDict, ValidationError

class Model(BaseModel):
    model_config = ConfigDict(extra="ignore")

    field_1: int = 0

try:
    Model(field_1=10, extra_1="data")
except ValidationError as ex:
    print(ex)

In [16]:
m = Model(field_1=10, extra_1="data")

dict(m)

{'field_1': 10}

In [17]:
m.model_fields

{'field_1': FieldInfo(annotation=int, required=False, default=0)}

## Strict and Lax Type Coercion
As we saw earlier, Pydantic performs a lax type coercion.

In [18]:
class Model(BaseModel):
    field_1: str
    field_2: float
    field_3: list
    field_4: tuple

try:
    Model(field_1=100, field_2=1, field_3=[1, 2, 3], field_4=(1, 2, 3))
except ValidationError as ex:
    print(ex)

1 validation error for Model
field_1
  Input should be a valid string [type=string_type, input_value=100, input_type=int]
    For further information visit https://errors.pydantic.dev/2.6/v/string_type


## Let explore an example:

In [20]:
json_data = '''
{
    "field_1": true,
    "field_2": 10.5,
    "field_3": 10,
    "field_4": null,
    "field_5": [1, 2, 3],
    "field_6": {
        "a": 1,
        "b": 2,
        "c": [3, 4, 5]
    },
    "field_7": [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]
    ]
}
'''

In [21]:
import json
from pprint import pprint

data = json.loads(json_data)
pprint(data)

{'field_1': True,
 'field_2': 10.5,
 'field_3': 10,
 'field_4': None,
 'field_5': [1, 2, 3],
 'field_6': {'a': 1, 'b': 2, 'c': [3, 4, 5]},
 'field_7': [[1, 0, 0], [0, 1, 0], [0, 0, 1]]}


In [22]:
class Model(BaseModel):
    # model_config = ConfigDict(strict=True)

    field_1: bool
    field_2: float
    field_3: int
    field_4: str | None
    field_5: tuple[int, ...]
    field_6: set[str]
    field_7: dict

In [23]:
try:
    Model.model_validate_json(json_data)
except ValidationError as ex:
    print(ex)

2 validation errors for Model
field_6
  Input should be a valid array [type=set_type, input_value={'a': 1, 'b': 2, 'c': [3, 4, 5]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/set_type
field_7
  Input should be an object [type=dict_type, input_value=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], input_type=list]
    For further information visit https://errors.pydantic.dev/2.6/v/dict_type


## Mutability
By default, Pydantic models are mutable, i.e. we can modify the values in a model instance after the model instance has been created.

In [24]:
class Model(BaseModel):
    field: int

m = Model(field=10)

In [25]:
try:
    m.field=20
except ValidationError as ex:
    print(ex)

## Project

In [26]:
from datetime import date
from pydantic import BaseModel


class Automobile(BaseModel):
    manufacturer: str
    series_name: str
    type_: str
    is_electric: bool = False
    manufactured_date: date
    base_msrp_usd: float
    vin: str
    number_of_doors: int = 4
    registration_country: str | None = None
    license_plate: str | None = None

In [27]:
from enum import Enum

class AutomobileType(Enum):
    sedan = "Sedan"
    coupe = "Coupe"
    convertible = "Convertible"
    suv = "SUV"
    truck = "Truck"

In [28]:
data_json = '''
{
    "manufacturer": " BMW ",
    "series_name": " M4 ",
    "type_": "Convertible",
    "manufactured_date": "2023-01-01",
    "base_msrp_usd": 93300,
    "vin": " 1234567890 "
}
'''

In [29]:
data_json_expected_serialization = {
    'manufacturer': 'BMW',
    'series_name': 'M4',
    'type_': AutomobileType.convertible,
    'is_electric': False,
    'manufactured_date': date(2023, 1, 1),
    'base_msrp_usd': 93300.0,
    'vin': '1234567890',
    'number_of_doors': 4,
    'registration_country': None,
    'license_plate': None
}

#Modified class

In [30]:
from pydantic import ConfigDict


class Automobile(BaseModel):
    model_config = ConfigDict(
        extra="forbid",
        str_strip_whitespace=True,
        validate_default=True,
        validate_assignment=True,
    )

    manufacturer: str
    series_name: str
    type_: AutomobileType
    is_electric: bool = False
    manufactured_date: date
    base_msrp_usd: float
    vin: str
    number_of_doors: int = 4
    registration_country: str | None = None
    license_plate: str | None = None

In [31]:
car = Automobile.model_validate_json(data_json)
car

Automobile(manufacturer='BMW', series_name='M4', type_=<AutomobileType.convertible: 'Convertible'>, is_electric=False, manufactured_date=datetime.date(2023, 1, 1), base_msrp_usd=93300.0, vin='1234567890', number_of_doors=4, registration_country=None, license_plate=None)

In [32]:
assert car.model_dump() == data_json_expected_serialization