https://www.youtube.com/watch?v=gb3arRysqMo

In [1]:
from pydantic import BaseModel, ValidationError


In [2]:
# Inherets from BaseModel
class Person(BaseModel):
    first_name: str # class attribute:python type hint
    last_name: str
    age: int

In [3]:
# instantiate class
p = Person(first_name='Isaac', last_name='Newton', age=84)
print(p)

first_name='Isaac' last_name='Newton' age=84


In [4]:
# class casts to type specified. Guaranties type is cast otherwise throws error
p = Person(first_name= 100, last_name= 200, age='84')
print(p)

first_name='100' last_name='200' age=84


In [5]:
try:
    Person(first_name='Isaac')
except ValidationError as ex:
    print(ex)

2 validation errors for Person
last_name
  field required (type=value_error.missing)
age
  field required (type=value_error.missing)


In [6]:
# Optional Attributes for Python >= 3.10

# Inherets from BaseModel
class Person(BaseModel):
    first_name: str # class attribute:python type hint
    last_name: str
    age: int | None

In [7]:
p = Person(first_name= 100, last_name= 200)
print(p)

first_name='100' last_name='200' age=None


In [9]:
# Optional Attributes for Python >= 3.7
from typing import Optional

# Inherets from BaseModel
class Person(BaseModel):
    first_name: str # class attribute:python type hint
    last_name: str
    age: Optional[int]

In [10]:
p = Person(first_name= 100, last_name= 200)
print(p)

first_name='100' last_name='200' age=None


In [11]:
# Default Values
class Person(BaseModel):
    first_name: str = None
    last_name: str = 'Smith'
    age: Optional[int]

In [12]:
p=Person()
print(p)

first_name=None last_name='Smith' age=None


In [14]:
# Serialize / Transform into Dictionary or JSON
# Dict
print(p.dict())
# JSON
print(p.json())

{'first_name': None, 'last_name': 'Smith', 'age': None}
{"first_name": null, "last_name": "Smith", "age": null}


In [17]:
# exclude unwanted attributes by specifying a set of strings
print(p.dict(exclude={'first_name', 'age'}))
# inlude
print(p.json(include={'first_name', 'age'}, indent=4))

{'last_name': 'Smith'}
{
    "first_name": null,
    "age": null
}


In [18]:
# Deserialize. schema has to match
from datetime import date

class Person(BaseModel):
    first_name: str = None
    last_name: str = 'Smith'
    age: Optional[int]
    dob : date

data = {
    "first_name":"Isaac",
    "last_name":"Newton",
    "dob":date(1994,1,2)
}

p = Person.parse_obj(data)

print(p)

first_name='Isaac' last_name='Newton' age=None dob=datetime.date(1994, 1, 2)


In [20]:
# works with multiple date formats
data = '''
{
    "first_name":"Isaac",
    "last_name":"Newton",
    "dob":"1958-1-3"
}
'''
p = Person.parse_raw(data)

print(p)

first_name='Isaac' last_name='Newton' age=None dob=datetime.date(1958, 1, 3)


In [21]:
# This example will show how to comply with the JSON naming convention, i.e. camelCase
from pydantic import Field

class Person(BaseModel):
    first_name: str = Field(default=None, alias='firstName') # Field is an instance
    last_name: str = Field(alias='lastName')
    age: int | None
    dob : date

In [23]:
# Pydantic will look for the alias name
try:
    p = Person(firstName="Mike", age=68, dob='1995-1-10')
except ValidationError as ex:
    print(ex.json())

[
  {
    "loc": [
      "lastName"
    ],
    "msg": "field required",
    "type": "value_error.missing"
  }
]


In [33]:
# Using a mixture of field names and aliases

class Person(BaseModel):
    first_name : str = Field(alias='firstName', default=None)
    last_name : str = Field(alias='lastName')
    age : int
    dob : date

    # our custom configuration
    class Config:
        allow_population_by_field_name = True

p = Person(firstName="Isam", last_name="Burk", age=89, dob="2022-01-01")
print(p)

first_name='Isam' last_name='Burk' age=89 dob=datetime.date(2022, 1, 1)


In [40]:
 # The alias is helpful when the source attribute names might not match the class


data = {
    "firstName":"Isaac",
    "lastName":"Newton",
    "age":5,
    "dob":"1958-1-3"
}

p = Person.parse_obj(data)
print(p)

data2 = '''
{
    "first_name":"Isaac",
    "last_name":"Newton",
    "age":"5",
    "dob":"1958-1-3"
}
'''
p = Person.parse_raw(data2)
print(p)
# field names printed

first_name='Isaac' last_name='Newton' age=5 dob=datetime.date(1958, 1, 3)
first_name='Isaac' last_name='Newton' age=5 dob=datetime.date(1958, 1, 3)


In [41]:
# field names printed
print(p.dict())

# However we can instruct the Serializers to use the aliase names
print(p.dict(by_alias=True))
print(p.json(by_alias=True))

{'first_name': 'Isaac', 'last_name': 'Newton', 'age': 5, 'dob': datetime.date(1958, 1, 3)}
{'firstName': 'Isaac', 'lastName': 'Newton', 'age': 5, 'dob': datetime.date(1958, 1, 3)}
{"firstName": "Isaac", "lastName": "Newton", "age": 5, "dob": "1958-01-03"}


# Validation

In [42]:
# smart way of concat dictionaries
data_junk = {**data, "junk":"extra field"}
print(data_junk)

# Pydantic ignores the field which is not mapped
p = Person.parse_obj(data_junk)
print(p)

{'firstName': 'Isaac', 'lastName': 'Newton', 'age': 5, 'dob': '1958-1-3', 'junk': 'extra field'}
first_name='Isaac' last_name='Newton' age=5 dob=datetime.date(1958, 1, 3)


In [45]:
# To get a validation error when extra attributes are detected to be parsed, change the config
from pydantic import Extra

class Person(BaseModel):
    first_name : str = Field(alias='firstName', default=None)
    last_name : str = Field(alias='lastName')
    age : int
    dob : date

    # our custom configuration
    class Config:
        allow_population_by_field_name = True # allows us to work with either field name or alias
        extra = Extra.forbid

try:
    p = Person.parse_obj(data_junk)
    print(p)
except ValidationError as ex:
    print(ex.json())

[
  {
    "loc": [
      "junk"
    ],
    "msg": "extra fields not permitted",
    "type": "value_error.extra"
  }
]


In [60]:
from pydantic import conint, constr

class CustomBaseModel(BaseModel):
    class Config:
        extra = Extra.forbid
        allow_population_by_field_name = True
        

class Test(CustomBaseModel):
    age : conint(gt=0, le=150)
    name : constr(strip_whitespace = True, min_length=2, strict=True, curtail_length=25) 
    # strict = if something is castable to string but is not actually a string, throw an error. value must strictly be string
    # curtail_length = truncate to first 25 characters instead of using max_length and throwing error 


print(Test(age=140, name="    Name "))
print(Test(age=140, name="    N "))

age=140 name='Name'


ValidationError: 1 validation error for Test
name
  ensure this value has at least 2 characters (type=value_error.any_str.min_length; limit_value=2)

In [51]:
from pydantic import validator

class Test(CustomBaseModel):
    hash_tag:str

    @validator('hash_tag') # validator decorator
    def validate_hash_tag(cls, value):
        # the validator will try to parse, and returns the value if no errors are detected
        if not value.startswith('#'):
            raise ValueError("Has tag must start with a #")
        return value.lower()

In [52]:
t = Test(hash_tag="#Test")
print(t.hash_tag)

#test


In [53]:
# changing the value again, wont perform the test
t.hash_tag = "Test"
print(t.hash_tag)

Test


In [54]:
t = Test(hash_tag="Test")
print(t.hash_tag)

ValidationError: 1 validation error for Test
hash_tag
  Has tag must start with a # (type=value_error)

In [55]:
class Test(CustomBaseModel):
    hash_tag:constr(min_length=5, strip_whitespace=True) # these checks are performed before the validator decorator

    @validator("hash_tag")
    def validate_hash_tag(cls, value):
        if not value.startswith('#'):
            return f"#{value.lower()}" # if the hash tag was missed, pre-pend it
        return value.lower()

In [56]:
t = Test(hash_tag="Test")


ValidationError: 1 validation error for Test
hash_tag
  ensure this value has at least 5 characters (type=value_error.any_str.min_length; limit_value=5)

In [57]:
t = Test(hash_tag="Test2")
print(t.dict())

{'hash_tag': '#test2'}


In [61]:
from enum import Enum
from typing import List, Tuple, Union

class PolygonType(Enum):
    traingle = 3
    tetragon = 4
    pentagon = 5
    hexagon = 6
    

class PolygonModel(CustomBaseModel):
    polygon_type: PolygonType # custom defined class
    vertices: List[Tuple[Union[int, float], Union[int, float]]]

    @validator('vertices')
    def validate_vertices(cls, value, values):  # pydantic is specifically looking for value and values. gotta type these 
        polygon_type = values.get('polygon_type')
        if polygon_type:
            num_vertices_required = polygon_type.value
            if len(value) != num_vertices_required:
                raise ValueError(
                    f"For a {polygon_type.name}, exactly {polygon_type.value} ""vertices are required."
                )
        return value

In [62]:
t = PolygonType.traingle
print(t.value)
print(t.name)

3
traingle


In [63]:
PolygonModel(polygon_type=PolygonType.traingle, vertices=[(1,1), (2,2), (3,3)])

PolygonModel(polygon_type=<PolygonType.traingle: 3>, vertices=[(1, 1), (2, 2), (3, 3)])

In [65]:
PolygonModel(polygon_type=PolygonType.traingle, vertices=[(1,1), (2,2+2j), (3,3)])
# vertices -> 1 -> 1 
# implies tuple 1, position 1

ValidationError: 2 validation errors for PolygonModel
vertices -> 1 -> 1
  value is not a valid integer (type=type_error.integer)
vertices -> 1 -> 1
  value is not a valid float (type=type_error.float)

- post
- - byline (one or more authors)
- - - author:
- - - - first_name (required, min 2 chars, max 20 chars)
- - - - last_name (required, min 2 chars, max 20 chars)
- - - - display name (optional, default to first name, intitial od  last name, min 1 char, max 25 char)
- - title (required, at least 10 char, no more than 50, force title case)
- - sub title (optional, if present at least 20 characters, max 100)
- - body (required, at least 100 characters, no upper limit)
- - links (0 or more)
- - - link:
- - - - name (required, min 2 chars, max 25 chars)
- - - - url (required, valid url, that must include scheme (http/https))

In [90]:
from pydantic import AnyHttpUrl
class Link(CustomBaseModel):
    name: constr(min_length=5, max_length=25)
    url: AnyHttpUrl

class Author(CustomBaseModel):
    first_name: constr(min_length=2, max_length=20, strip_whitespace=True)
    last_name: constr(min_length=2, max_length=20, strip_whitespace=True)
    display_name: constr(min_length=1, max_length=25) = None    # constraints + default value
    url: List[Link] = []

    # always = True forces the validator to run, even if display_name is None, this is how we can set a dynamic default value
    @validator("display_name", always=True)
    def validate_display_name(cls, value, values):
        # validator runs, even if previous did not valudate properly - so we need to run our code only if prior fields validated OK
        if not value and 'first_name' in values and 'last_name' in values:
            first_name = values['first_name']
            last_name = values['last_name']
            return f"{first_name} {(last_name[0]).upper()}" # concat a dynamic default value
        return value

In [92]:
Author(first_name="Gail", last_name="Nightingale", url=[Link(name="google", url="https://www.gogle.com")])

Author(first_name='Gail', last_name='Nightingale', display_name='Gail N', url=[Link(name='google', url=AnyHttpUrl('https://www.gogle.com', ))])

In [93]:
Author(first_name="Gail", last_name="Nightingale", display_name="GN")

Author(first_name='Gail', last_name='Nightingale', display_name='GN', url=[])

In [94]:
Author(first_name="X", last_name="Y")

ValidationError: 2 validation errors for Author
first_name
  ensure this value has at least 2 characters (type=value_error.any_str.min_length; limit_value=2)
last_name
  ensure this value has at least 2 characters (type=value_error.any_str.min_length; limit_value=2)

In [95]:
from pydantic import conlist

class Post(CustomBaseModel):
    byline: conlist(item_type=Author, min_items=1) # a list of defined objects
    title: constr(min_length=10, max_length=50, strip_whitespace=True)
    sub_title: constr(min_length=20, max_length=50, strip_whitespace=True) = None
    body: constr(min_length=100)

    @validator('title')
    def validator_title(cls, value):
        return value and value.title() # this is a text trick to convert first character as captial

In [98]:
p = Post(
            byline=[
                        Author(first_name="XX", last_name="YY", 
                                url=[
                                        Link(name="Google", url="http://www.G.com")
                                    ]
                                )
                    ], 
            title="ABCD EFGH IJ", 
            sub_title="r"*40, 
            body="t"*200
        )
print(p.json(indent=2)) 

{
  "byline": [
    {
      "first_name": "XX",
      "last_name": "YY",
      "display_name": "XX Y",
      "url": [
        {
          "name": "Google",
          "url": "http://www.G.com"
        }
      ]
    }
  ],
  "title": "Abcd Efgh Ij",
  "sub_title": "rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr",
  "body": "tttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt"
}


In [100]:
# check. Serialize and Deserialize
pp = Post.parse_raw(p.json(indent=2))

print(pp.json(indent=2))

{
  "byline": [
    {
      "first_name": "XX",
      "last_name": "YY",
      "display_name": "XX Y",
      "url": [
        {
          "name": "Google",
          "url": "http://www.G.com"
        }
      ]
    }
  ],
  "title": "Abcd Efgh Ij",
  "sub_title": "rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr",
  "body": "tttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt"
}


In [106]:
# Add more to a list. This gets actually validated
pp.byline.append(Author(first_name="Cx", last_name="Cy"))

pp.byline

[Author(first_name='XX', last_name='YY', display_name='XX Y', url=[Link(name='Google', url=AnyHttpUrl('http://www.G.com', ))]),
 Author(first_name='Cx', last_name='Cy', display_name='Cx C', url=[]),
 Author(first_name='Cx', last_name='Cy', display_name='Cx C', url=[]),
 Author(first_name='Cx', last_name='Cy', display_name='Cx C', url=[]),
 Author(first_name='Cx', last_name='Cy', display_name='Cx C', url=[]),
 Author(first_name='Cx', last_name='Cy', display_name='Cx C', url=[])]

In [109]:
# print JSON Schema
# Fast API leverages pytdantic cause it can read the JSON Schema
print(Post.schema_json(indent=2))

{
  "title": "Post",
  "type": "object",
  "properties": {
    "byline": {
      "title": "Byline",
      "minItems": 1,
      "type": "array",
      "items": {
        "$ref": "#/definitions/Author"
      }
    },
    "title": {
      "title": "Title",
      "minLength": 10,
      "maxLength": 50,
      "type": "string"
    },
    "sub_title": {
      "title": "Sub Title",
      "minLength": 20,
      "maxLength": 50,
      "type": "string"
    },
    "body": {
      "title": "Body",
      "minLength": 100,
      "type": "string"
    }
  },
  "required": [
    "byline",
    "title",
    "body"
  ],
  "additionalProperties": false,
  "definitions": {
    "Link": {
      "title": "Link",
      "type": "object",
      "properties": {
        "name": {
          "title": "Name",
          "minLength": 5,
          "maxLength": 25,
          "type": "string"
        },
        "url": {
          "title": "Url",
          "minLength": 1,
          "maxLength": 65536,
          "format": "ur

# Dataclass

In [141]:
import xml.etree.ElementTree as ET

class Person(BaseModel):
    name:str
    surname:str
    dob:date = None

    def toXML(self):
        naturalsubject = ET.Element("naturalsubject")
        ET.SubElement(naturalsubject, "name").text = self.name # values['name']
        ET.SubElement(naturalsubject, "surname").text = self.surname
        ET.SubElement(naturalsubject, "dob").text = self.dob
        print(ET.dump(naturalsubject))
        return ET.Element(naturalsubject)

class Entity(BaseModel):
    name:str
    incorpNum:int | None


In [144]:
p = Person(name="TheName", surname="TheSurname")
p.toXML()

e = Entity(name="EntityName")
print(e.json(exclude_unset=True, exclude_none=True))

<naturalsubject><name>TheName</name><surname>TheSurname</surname><dob /></naturalsubject>
None
{"name": "EntityName"}


In [14]:
import uuid

print(str(uuid.uuid4())) # for all objects

print(str(uuid.uuid5(uuid.NAMESPACE_DNS,'803010'))) # track reports and their equivalent guid

8fc96cee-ed19-4686-9e17-1664ff7ef839
14d4875f-3da2-5cae-a825-c4c75992d243
