In [1]:
%%writefile requirements.txt

pydantic

Overwriting requirements.txt


In [2]:
!pip install -r requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Pydantic Project

We are going to build a model for an automobile. 

Throughout the course, at the end of each section you will add on to this model, refactor some parts of it, or create related models that will then be used in conjunction with this automobile model when we get to model composition.

To start, you should create an `Automobile` model that contains the following fields:
- `manufacturer`, string, required, not nullable
- `series_name`, string, required, not nullable
- `type_`, string, required, not nullable
- `is_electric`, boolean, defaults to `False`, not nullable
- `manufactured_date`, date, required (hint use `date` from `datetime` module as your field type hint), not nullable
- `base_msrp_usd`, float, required, not nullable
- `vin`, string, required, not nullable
- `number_of_doors`, integer, defaults to `4`, not nullable
- `registration_country`, string, defaults to `None`
- `license_plate`, string, defaults to `None`

Once you have created your model, you should test deserializing and serializing your model and make sure everything works.

In [3]:
from pydantic import BaseModel
from datetime import date

class Automobile(BaseModel):
    manufacturer: str
    series_name: str
    type_: str
    is_electric : bool = False
    manufactured_date: date
    base_msrp_usd : float
    vin : str
    number_of_doors : int = 4
    registration_country : str | None = None
    license_plate : str | None = None
    

You can test your model by deserializing the following input data, and comparing the serialization of each of those models to the provided Python dictionaries.

In other words, test them by doing something like this for both cases:
- create model by deserializing the data
- check the model's serialization to `dict` is equal to the provided expected dictionary

In [4]:
# Python dictionary

data = {
    "manufacturer": "BMW",
    "series_name": "M4",
    "type_": "Convertible",
    "is_electric": False,
    "manufactured_date": "2023-01-01",
    "base_msrp_usd": 93_300,
    "vin": "1234567890",
    "number_of_doors": 2,
    "registration_country": "France",
    "license_plate": "AAA-BBB",
}

data_expected_serialization = {
    'manufacturer': 'BMW',
    'series_name': 'M4',
    'type_': 'Convertible',
    'is_electric': False,
    'manufactured_date': date(2023,1,1),
    'base_msrp_usd': 93_300,
    'vin': '1234567890',
    'number_of_doors': 2,
    'registration_country': 'France',
    'license_plate': 'AAA-BBB',
}

In [5]:
# JSON
data_json = '''
{
    "manufacturer": "BMW",
    "series_name": "M4",
    "type_": "Convertible",
    "manufactured_date": "2023-01-01",
    "base_msrp_usd": 93300,
    "vin": "1234567890"
}
'''

data_json_expected_serialization = {
    'manufacturer': 'BMW',
    'series_name': 'M4',
    'type_': 'Convertible',
    'is_electric': False,
    'manufactured_date': date(2023, 1, 1),
    'base_msrp_usd': 93_300,
    'vin': '1234567890',
    'number_of_doors': 4,
    'registration_country': None,
    'license_plate': None,
}

In [6]:
auto1 = Automobile.model_validate(data)

In [7]:
assert auto1.model_dump() ==  data_expected_serialization

In [8]:
auto2 = Automobile.model_validate_json(data_json)

In [9]:
assert auto2.model_dump() ==  data_json_expected_serialization

In [10]:
from pydantic import ValidationError
data_invalid = data.copy()
data_invalid['is_electric'] = 100
try:
    Automobile.model_validate(data_invalid)
except ValidationError as e:
    print(e)
    assert "Input should be a valid boolean" in str(e)

data_invalid = data.copy()
del data_invalid['manufacturer']
try:
    Automobile.model_validate(data_invalid)
except ValidationError as e:
    print(e)
    assert "Field required" in str(e)
    


1 validation error for Automobile
is_electric
  Input should be a valid boolean, unable to interpret input [type=bool_parsing, input_value=100, input_type=int]
    For further information visit https://errors.pydantic.dev/2.10/v/bool_parsing
1 validation error for Automobile
manufacturer
  Field required [type=missing, input_value={'series_name': 'M4', 'ty...cense_plate': 'AAA-BBB'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/missing


Modify your `Automobile` model to implement the following:
- set model to forbid extra fields
- set model to strip whitespace from all string fields
- set model to validate defaults and assignments
- use the enum provided below for the `type_` field

In [11]:
from enum import Enum

class AutomobileType(Enum):
    sedan = "Sedan"
    coupe = "Coupe"
    convertible = "Convertible"
    suv = "SUV"
    truck = "Truck"

You can test your model by serializing and deserializing this data:

In [12]:
data_json = '''
{
    "manufacturer": " BMW ",
    "series_name": " M4 ",
    "type_": "Convertible",
    "manufactured_date": "2023-01-01",
    "base_msrp_usd": 93300,
    "vin": " 1234567890 "
}
'''

data_json_expected_serialization = {
    'manufacturer': 'BMW',
    'series_name': 'M4',
    'type_': AutomobileType.convertible,
    'is_electric': False,
    'manufactured_date': date(2023, 1, 1),
    'base_msrp_usd': 93300.0,
    'vin': '1234567890',
    'number_of_doors': 4,
    'registration_country': None,
    'license_plate': None
}

In [13]:
from pydantic import ConfigDict

class Automobile(BaseModel):
    model_config = ConfigDict(extra='forbid',
                              str_strip_whitespace=True,
                              validate_default=True,
                              validate_assignment=True)
    manufacturer: str
    series_name: str
    type_: AutomobileType
    is_electric : bool = False
    manufactured_date: date
    base_msrp_usd : float
    vin : str
    number_of_doors : int = 4
    registration_country : str | None = None
    license_plate : str | None = None

In [14]:
m = Automobile.model_validate_json(data_json)
assert m.model_dump() == data_json_expected_serialization 

In [15]:
from pydantic import ValidationError
data_invalid = data.copy()
data_invalid['extra_field'] = 'extra'
try:
    Automobile.model_validate(data_invalid)
except ValidationError as e:
    print(e)
    assert "Extra inputs are not permitted" in str(e)

1 validation error for Automobile
extra_field
  Extra inputs are not permitted [type=extra_forbidden, input_value='extra', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/extra_forbidden


Modify your `Automobile` model to implement the following:

- auto generate camel case aliases
- the field `type_` in our model is provided as `type` in source data, and should also serialize to `type`.
- the data we receive contains the following field names that need to map to our own model field names - but we still want our camelized field names to be used for serialization. Account for that (without renaming the field names):
    - `number_of_doors` is provided as `doors`
    - `manufactured_date` is provided as `completionDate`
- The field `base_msrp_usd` is provided as `msrpUSD`, and we want the serialization name to be `baseMSRPUSD`
- we want the JSON serialized output of `manufactured_date` to be this pattern: `YYYY/MM/DD` (e.g. `2020/01/01`), but serializing to a Python dict should remain as a `date` object.

The following source data:

In [16]:
data_json = '''
{
    "manufacturer": "BMW",
    "seriesName": "M4",
    "type": "Convertible",
    "isElectric": false,
    "completionDate": "2023-01-01",
    "msrpUSD": 93300,
    "vin": "1234567890",
    "doors": 2,
    "registrationCountry": "France",
    "licensePlate": "AAA-BBB"
}
'''

should deserialaze to these:

In [17]:
expected_serialized_dict = {
    'manufacturer': 'BMW',
    'series_name': 'M4',
    'type_': AutomobileType.convertible,
    'is_electric': False,
    'manufactured_date': date(2023, 1, 1),
    'base_msrp_usd': 93300.0,
    'vin': '1234567890',
    'number_of_doors': 2,
    'registration_country': 'France',
    'license_plate': 'AAA-BBB'
}

expected_serialized_dict_by_alias = {
    'manufacturer': 'BMW',
    'seriesName': 'M4',
    'type': AutomobileType.convertible,
    'isElectric': False,
    'manufacturedDate': date(2023, 1, 1),
    'baseMSRPUSD': 93300.0,
    'vin': '1234567890',
    'numberOfDoors': 2,
    'registrationCountry': 'France',
    'licensePlate': 'AAA-BBB'
}

expected_serialized_json_by_alias = (
    '{"manufacturer":"BMW","seriesName":"M4","type":"Convertible",'
    '"isElectric":false,"manufacturedDate":"2023/01/01","baseMSRPUSD":93300.0,'
    '"vin":"1234567890","numberOfDoors":2,"registrationCountry":"France",'
    '"licensePlate":"AAA-BBB"}'
)

In [18]:
from pydantic.alias_generators import to_camel
from pydantic import Field, field_serializer
class Automobile(BaseModel):
    model_config = ConfigDict(extra='forbid',
                              str_strip_whitespace=True,
                              validate_default=True,
                              validate_assignment=True, alias_generator=to_camel)
    manufacturer: str
    series_name: str
    type_: AutomobileType = Field(alias='type')
    is_electric : bool = False
    manufactured_date: date = Field(validation_alias='completionDate')
    base_msrp_usd : float = Field(validation_alias='msrpUSD', serialization_alias='baseMSRPUSD')
    vin : str
    number_of_doors : int = Field(default=4, validation_alias="doors")
    registration_country : str | None = None
    license_plate : str | None = None
    
    @field_serializer("manufactured_date", when_used="json-unless-none")
    def serialize_manufactured_date(self, value:date):
        return value.strftime("%Y/%m/%d")

In [19]:
m = Automobile.model_validate_json(data_json)

In [20]:
assert m.model_dump() == expected_serialized_dict

In [21]:
assert m.model_dump(by_alias=True) == expected_serialized_dict_by_alias

In [22]:
assert m.model_dump_json(by_alias=True) == expected_serialized_json_by_alias

Modify your `Automobile` model to implement the following:
- add a field named `id_`
    - make it the **first** field in your model
    - if provided in the data, it will be named `id`
    - it should deserialize to `id`
    - the field type should be a uuid4
    - for now, have it default to `None`

You can use this data to test your model:

In [23]:
from uuid import UUID

data = {
    "id": "c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7",
    "manufacturer": "BMW",
    "seriesName": "M4",
    "type": "Convertible",
    "isElectric": False,
    "completionDate": "2023-01-01",
    "msrpUSD": 93_300,
    "vin": "1234567890",
    "doors": 2,
    "registrationCountry": "France",
    "licensePlate": "AAA-BBB"
}

expected_serialized_by_alias = {
    'id': UUID('c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7'),
    'manufacturer': 'BMW',
    'seriesName': 'M4',
    'type': AutomobileType.convertible,
    'isElectric': False,
    'manufacturedDate': date(2023, 1, 1),
    'baseMSRPUSD': 93300.0,
    'vin': '1234567890',
    'numberOfDoors': 2,
    'registrationCountry': 'France',
    'licensePlate': 'AAA-BBB'
}

data_no_id = {
    "manufacturer": "BMW",
    "seriesName": "M4",
    "type": "Convertible",
    "isElectric": False,
    "completionDate": "2023-01-01",
    "msrpUSD": 93_300,
    "vin": "1234567890",
    "doors": 2,
    "registrationCountry": "France",
    "licensePlate": "AAA-BBB"
}

expected_serialization_data_no_id_by_alias = {
    'id': None,
    'manufacturer': 'BMW',
    'seriesName': 'M4',
    'type': AutomobileType.convertible,
    'isElectric': False,
    'manufacturedDate': date(2023, 1, 1),
    'baseMSRPUSD': 93300.0,
    'vin': '1234567890',
    'numberOfDoors': 2,
    'registrationCountry': 'France',
    'licensePlate': 'AAA-BBB'
}

In [24]:
from pydantic import UUID4
class Automobile(BaseModel):
    model_config = ConfigDict(extra='forbid',
                              str_strip_whitespace=True,
                              validate_default=True,
                              validate_assignment=True, alias_generator=to_camel)
    id_: UUID4 | None = Field(alias='id',default=None)
    manufacturer: str
    series_name: str
    type_: AutomobileType = Field(alias='type')
    is_electric : bool = False
    manufactured_date: date = Field(validation_alias='completionDate')
    base_msrp_usd : float = Field(validation_alias='msrpUSD', serialization_alias='baseMSRPUSD')
    vin : str
    number_of_doors : int = Field(default=4, validation_alias="doors")
    registration_country : str | None = None
    license_plate : str | None = None
    
    @field_serializer("manufactured_date", when_used="json-unless-none")
    def serialize_manufactured_date(self, value:date):
        return value.strftime("%Y/%m/%d")

In [25]:
m = Automobile.model_validate(data)

In [26]:
assert m.model_dump(by_alias=True) == expected_serialized_by_alias


In [27]:
m2 = Automobile.model_validate(data_no_id)

In [28]:
assert m2.model_dump(by_alias=True) == expected_serialization_data_no_id_by_alias


Modify your `Automobile` model to implement the following:
- constrain the manufactured date to be no earlier than `1980-01-01` (Hint: how did you constrain numbers using `Field`? works the same with dates)
- numbers of doors should be constrained to be a min of 2, a max of 4, and a multiple of 2 (so 2 doors, or 4 doors)
- change `id` to no longer be nullable, and provide a uuid4 as a default (make sure the default is not always the same when creating multiple models)

Test data:

In [29]:
data = {
    "id": "c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7",
    "manufacturer": "BMW",
    "seriesName": "M4",
    "type": "Convertible",
    "isElectric": False,
    "completionDate": "2023-01-01",
    "msrpUSD": 93_300,
    "vin": "1234567890",
    "doors": 2,
    "registrationCountry": "France",
    "licensePlate": "AAA-BBB"
}

Expected serialization to dict by alias is:

In [30]:
from uuid import UUID

expected_serialized_by_alias = {
    'id': UUID('c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7'),
    'manufacturer': 'BMW',
    'seriesName': 'M4',
    'type': AutomobileType.convertible,
    'isElectric': False,
    'manufacturedDate': date(2023, 1, 1),
    'baseMSRPUSD': 93300.0,
    'vin': '1234567890',
    'numberOfDoors': 2,
    'registrationCountry': 'France',
    'licensePlate': 'AAA-BBB'
}

In [31]:
data_no_id = {
    "manufacturer": "BMW",
    "seriesName": "M4",
    "type": "Convertible",
    "isElectric": False,
    "completionDate": "2023-01-01",
    "msrpUSD": 93_300,
    "vin": "1234567890",
    "doors": 2,
    "registrationCountry": "France",
    "licensePlate": "AAA-BBB"
}

In [32]:
expected_serialization_data_no_id_by_alias = {
    'id': None,
    'manufacturer': 'BMW',
    'seriesName': 'M4',
    'type': AutomobileType.convertible,
    'isElectric': False,
    'manufacturedDate': date(2023, 1, 1),
    'baseMSRPUSD': 93300.0,
    'vin': '1234567890',
    'numberOfDoors': 2,
    'registrationCountry': 'France',
    'licensePlate': 'AAA-BBB'
}

In [33]:
from uuid import uuid4

class Automobile(BaseModel):
    model_config = ConfigDict(
        extra="forbid",
        str_strip_whitespace=True,
        validate_default=True,
        validate_assignment=True,
        alias_generator=to_camel,
    )

    id_: UUID4 = Field(alias="id", default_factory=uuid4) 
    manufacturer: str
    series_name: str
    type_: AutomobileType = Field(alias="type")
    is_electric: bool = False
    manufactured_date: date = Field(validation_alias="completionDate", ge=date(1980, 1, 1))
    base_msrp_usd: float = Field(validation_alias="msrpUSD", serialization_alias="baseMSRPUSD")
    vin: str
    number_of_doors: int = Field(default=4, validation_alias="doors",ge=2,le=4,multiple_of=2)
    registration_country: str | None = None
    license_plate: str | None = None

    @field_serializer("manufactured_date", when_used="json-unless-none")
    def serialize_date(self, value: date) -> str:
        return value.strftime("%Y/%m/%d")

Tests

In [34]:
m = Automobile.model_validate(data)
assert m.model_dump(by_alias=True) == expected_serialized_by_alias

In [35]:
m = Automobile.model_validate(data_no_id)
m2 = Automobile.model_validate(data_no_id)
assert m.id_ != m2.id_

Create an annotated type, named `BoundedString` to define a string that has a minimum of 2 characters, and no more than 50 characters.

Modify your `Automobile` model to use this annotated type for the following fields:
- `manufacturer`
- `series_name`
- `vin`
- `registration_country`
- `license_plate`

Create an annotated type, called `BoundedList` that uses a type variable to define a list of elements with a minimum of `1` element and a maximum of `5` elements.

Using this annotated type, add a new field to the model as follows:
- field name should be `top_features`
- place it just before the `vin` field
- it should both deserialize from and serialize to `topFeatures`
- if should be a bounded list of strings, which themselves shoudl be bounded to a minimum of `2` chars, and no more than `50`. (Hint: use the `BoundedString` type you create as the type when you define the field type in your model with `BoundedList`)
- make it optional, with a default of `None`

Use this data to test your model:

In [36]:
from uuid import UUID

data = {
    "id": "c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7",
    "manufacturer": "BMW",
    "seriesName": "M4 Competition xDrive",
    "type": "Convertible",
    "isElectric": False,
    "completionDate": "2023-01-01",
    "msrpUSD": 93_300,
    "topFeatures": ["6 cylinders", "all-wheel drive", "convertible"],
    "vin": "1234567890",
    "doors": 2,
    "registrationCountry": "France",
    "licensePlate": "AAA-BBB"
}

expected_serialized_by_alias = {
    'id': UUID('c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7'),
    'manufacturer': 'BMW',
    'seriesName': 'M4 Competition xDrive',
    'type': AutomobileType.convertible,
    'isElectric': False,
    'manufacturedDate': date(2023, 1, 1),
    'baseMSRPUSD': 93300.0,
    'topFeatures': ['6 cylinders', 'all-wheel drive', 'convertible'],
    'vin': '1234567890',
    'numberOfDoors': 2,
    'registrationCountry': 'France',
    'licensePlate': 'AAA-BBB'
}

In [37]:
from typing import Annotated, TypeVar
from pydantic import Field

BoundedString = Annotated[str, Field(min_length=2, max_length=50)]

T = TypeVar('T')

BoundedList = Annotated[list[T], Field(min_length=1, max_length=5)]

In [38]:
from uuid import uuid4

class Automobile(BaseModel):
    model_config = ConfigDict(
        extra="forbid",
        str_strip_whitespace=True,
        validate_default=True,
        validate_assignment=True,
        alias_generator=to_camel,
    )

    id_: UUID4 = Field(alias="id", default_factory=uuid4) 
    manufacturer: BoundedString
    series_name: BoundedString
    type_: AutomobileType = Field(alias="type")
    is_electric: bool = False
    manufactured_date: date = Field(validation_alias="completionDate", ge=date(1980, 1, 1))
    base_msrp_usd: float = Field(validation_alias="msrpUSD", serialization_alias="baseMSRPUSD")
    top_features: BoundedList[BoundedString] | None = None
    vin: BoundedString
    number_of_doors: int = Field(default=4, validation_alias="doors",ge=2,le=4,multiple_of=2)
    registration_country: BoundedString | None = None
    license_plate: BoundedString | None = None

    @field_serializer("manufactured_date", when_used="json-unless-none")
    def serialize_date(self, value: date) -> str:
        return value.strftime("%Y/%m/%d")

In [39]:
car = Automobile.model_validate(data)
car

Automobile(id_=UUID('c4e60f4a-3c7f-4da5-9b3f-07aee50b23e7'), manufacturer='BMW', series_name='M4 Competition xDrive', type_=<AutomobileType.convertible: 'Convertible'>, is_electric=False, manufactured_date=datetime.date(2023, 1, 1), base_msrp_usd=93300.0, top_features=['6 cylinders', 'all-wheel drive', 'convertible'], vin='1234567890', number_of_doors=2, registration_country='France', license_plate='AAA-BBB')

In [40]:
assert car.model_dump(by_alias=True) == expected_serialized_by_alias