In [36]:
from pydantic import BaseModel, ValidationError, Field, validate_call
from typing_extensions import Annotated
from uuid import uuid4
from datetime import datetime
from typing import List
import logging

log = logging.getLogger(__name__)


Untrusted data can be passed to a model and, after parsing and validation, Pydantic guarantees that the fields of the resultant model instance will conform to the field types defined on the model.

In [2]:
# define a baseModel
class InstrumentData(BaseModel):
    instrument_id: int
    date: datetime = None
    x_data: List[int] = []
    y_data: List[float] = []

The model can then be used with a lot of helpful validation and other functions

In [4]:
data1 =InstrumentData(instrument_id=1, date=datetime.now(), x_data=[1,2,3], y_data=[1.1, 2.2, 3.3])

In [5]:
data1.model_dump_json()

'{"instrument_id":1,"date":"2024-06-04T12:02:59.223189","x_data":[1,2,3],"y_data":[1.1,2.2,3.3]}'

In [7]:
data2 = InstrumentData(instrument_id=2, date=datetime.now(), x_data=[4,5,6], y_data=[4.4, 'y', 6.6])

ValidationError: 1 validation error for InstrumentData
y_data.1
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='y', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/float_parsing

Or if you want to validate your own input, you can do that like so

In [16]:
try:
    InstrumentData.validate({"x_data": [1, 2, 3], "y_data": [1.1, 2.2, 3.3]})
except ValidationError as e:
    log.error(e)

1 validation error for InstrumentData
instrument_id
  Field required [type=missing, input_value={'x_data': [1, 2, 3], 'y_data': [1.1, 2.2, 3.3]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.7/v/missing


Default values and Annotations can also be added

In [23]:
class InstrumentData(BaseModel):
    instrument_id: Annotated[str, Field(default_factory=lambda: uuid4().hex)]
    date: datetime = None
    x_data: List[int] = []
    y_data: List[float] = []

In [24]:
data3 = InstrumentData(date=datetime.now(), x_data=[1,2,3], y_data=[1.1, 2.2, 3.3])

In [28]:
data3.dict()

{'instrument_id': '16f4c6f77eea45dcb139803a79458bb2',
 'date': datetime.datetime(2024, 6, 4, 12, 15, 38, 294040),
 'x_data': [1, 2, 3],
 'y_data': [1.1, 2.2, 3.3]}

Custom validators can be created that check incoming data within validation 

In [39]:
class InstrumentData(BaseModel):
    instrument_id: Annotated[str, Field(default_factory=lambda: uuid4().hex)]
    date: datetime = None
    temperature: Annotated[float, Field(ge=-273.15, le=1000)]
    x_data: List[int] = []
    y_data: List[float] = []


In [40]:
data4 = InstrumentData(date=datetime.now(), temperature=1500, x_data=[1,2,3], y_data=[1.1, 2.2, 3.3])

ValidationError: 1 validation error for InstrumentData
temperature
  Input should be less than or equal to 1000 [type=less_than_equal, input_value=1500, input_type=int]
    For further information visit https://errors.pydantic.dev/2.7/v/less_than_equal

some very complex validators can be used to check and coerce data into the format you want it to be
https://docs.pydantic.dev/latest/concepts/validators/#ordering-of-validators-within-annotated


Python typing will not fail

In [32]:
def add_nums(x: float, y: float) -> float:
    return x + y

In [33]:
add_nums("hello", "person")

'helloperson'

with Pydantic, you can validate parameters passed into function based on type

In [37]:
@validate_call
def add_nums(x: float, y: float) -> float:
    return x + y

In [38]:
add_nums("hello", "person")

ValidationError: 2 validation errors for add_nums
0
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='hello', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/float_parsing
1
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='person', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/float_parsing