# Data typing in Python

## From regular class to data class then pydantic


### Regular Class


In [1]:
class Dog:
    name: str
    race: str
    age: int
    def __init__(self, name, race, age):
        self.name = name
        self.race = race
        self.age = age


In [2]:
puppet1 = Dog('RUDY', 'Terrier Australiano', 2)
puppet2 = Dog('TEDDY', 'bichón frisé', 3)
puppet3 = Dog('TEDDY', 'bichón frisé', 3)


In [3]:
puppet2 == puppet3

False

Devuelve False por que son dos objetos distintos

In [4]:
print(puppet2)

<__main__.Dog object at 0x7fc6a0130880>


Devuelve el ID en memoria del Objeto

In [5]:
print(puppet2.name,puppet2.race,puppet2.age) 

TEDDY bichón frisé 3


### Data classes

Install: pip install dataclasses

In [6]:
from dataclasses import dataclass
@dataclass
class Dog:
    name: str
    race: str
    age: int


In [7]:
puppet1 = Dog('RUDY', 'Terrier Australiano', 2)
puppet2 = Dog('TEDDY', 'bichón frisé', 3)
puppet3 = Dog('TEDDY', 'bichón frisé', 3)


In [8]:
puppet2 == puppet3 

True

Devuelve True por que son el mismo objeto


In [9]:
print(puppet2) 

Dog(name='TEDDY', race='bichón frisé', age=3)


In [10]:
print(puppet1 > puppet2)

TypeError: '>' not supported between instances of 'Dog' and 'Dog'

#### Added method __post__init__


In [11]:
from dataclasses import dataclass, field
@dataclass
class Dog:
    sort_index: int = field(init=False)
    name: str
    race: str
    age: int
    def __repr__(self):
        return f"{self.sort_index} {self.name} {self.race} ({self.age})"  
    def __post_init__(self):
      self.sort_index = self.age


In [12]:
puppet1 = Dog('RUDY', 'Terrier Australiano', 2)
puppet2 = Dog('TEDDY', 'bichón frisé', 3)


In [13]:
print(puppet2)

3 TEDDY bichón frisé (3)


In [14]:
print(puppet1 > puppet2)


TypeError: '>' not supported between instances of 'Dog' and 'Dog'

Sigue devolviendo error, no soporta comparación > o <

#### Read only dataclasses


In [15]:
@dataclass(order=True, frozen=True)
class Dog:
    sort_index: int = field(init=False)
    name: str
    race: str
    age: int

    def __post_init__(self):
      object.__setattr__(self, 'sort_index', self.age)


In [16]:
puppet1 = Dog('RUDY', 'Terrier Australiano', 2)
puppet2 = Dog('TEDDY', 'bichón frisé', 3)
puppet3 = Dog('TEDDY', 'bichón frisé', 3)


In [17]:
print(puppet1)

Dog(sort_index=2, name='RUDY', race='Terrier Australiano', age=2)


In [18]:
print(puppet1 < puppet2 )

True


In [19]:
print(puppet1 > puppet2 )

False


In [20]:
puppet2.age = 4

FrozenInstanceError: cannot assign to field 'age'

Ahora no se puede modificar el objeto

### Annotation variables


In [21]:
from typing import List

nombre: str = 'Pedro'
edad: int = 24
altura_mts: float = 1.7
colegas: List[str] = ['Jane', 'John']


### Special types


* Any: A variable of type Any is compatible with all basic types.
* Literal: The literal type is used to indicate that it has a value equal to the indicated values (types).
* Union: Some variables sometimes need two basic types. 
* TypeDict: TypedDict allows defining a dictionary data type.
* NoReturn: NoReturn is similar to void in other programming languages, You can  use None  when a function returns nothing.
* Final: This data type is defined not to redefine a value.


#### Special types Any

In [22]:
from typing import Any

result: Any = "Done"
result = 10
status: str = "Pending"
status = result


No genera error el asignar un valor de tipo string y luego un entero

#### Special types - Literal


In [23]:
from typing import Literal, Dict
GENDER = Literal["male", "female", "No specified"]
def create_user(first_name: str,last_name: str,gender: GENDER,) -> Dict[str, str]:
    return {
        "first_name": first_name,
        "last_name": last_name,
        "gender": gender
    }


In [24]:
create_user("John", "Doe", "male")

{'first_name': 'John', 'last_name': 'Doe', 'gender': 'male'}

In [25]:
create_user("Jame", "Doe", "female")

{'first_name': 'Jame', 'last_name': 'Doe', 'gender': 'female'}

In [26]:
create_user("X", "X", "X")

{'first_name': 'X', 'last_name': 'X', 'gender': 'X'}

No devuelve error por que a Python no le importa el valor, pero si le importa a los validadores de código estático como mypy

#### Special types Union


In [27]:
from typing import Union

def get_temperature() -> Union[int, float]:
    return 20.8  # works with 20 too


#### Special types TypedDict

In [28]:
from typing import TypedDict, Union
class Card(TypedDict):
    rank: Union[str, int]
    suit: str


In [29]:
# Card can be used to annotate a variable
ace_of_spade: Card = {'rank': 'A', 'suit': '♤'}
# or can be instantiated
ace_of_spade = Card(rank='A', suit='♤')


In [30]:
print(ace_of_spade)


{'rank': 'A', 'suit': '♤'}


In [31]:
ace_of_spade2 = Card(rank=2, suit='♤')
print(ace_of_spade2)



{'rank': 2, 'suit': '♤'}


#### Special types NoReturn


In [32]:
from typing import NoReturn

def hi1() -> None:
    print("Hello world!")

def hi2() -> NoReturn:
    print("Hello world!")


Las dos formas son válidas para una función que no retorna algo

#### Special type Final


In [33]:
from typing import Final
MIN_NAME_LONG: Final = 2


In [34]:
# mypy  reporta  error al asignarle un nuevo valor a la variable
MIN_NAME_LONG += 1


In [35]:
class Validator(object):
    MIN_NAME_LONG: Final[int] = 4


In [36]:
class UserValidator(Validator):
    #  mark a flag in this line
    MIN_NAME_LONG = 3


#### Types validators mypy

Install: pip install mypy 

Se crea un archivo pruebas_typing.py

Se ejecuta mypy

In [54]:
!pip install pydantic mypy email_validator numpy

Collecting numpy
  Downloading numpy-1.22.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
[K     |████████████████████████████████| 16.8 MB 512 kB/s eta 0:00:01
Installing collected packages: numpy
Successfully installed numpy-1.22.2


### Pydantic

Data validation and settings management using python type annotations.



In [38]:
from pydantic import BaseModel
from typing import Optional 
from random import  randrange 

class Product(BaseModel):
    title: str
    description: str
    price: int
    published: Optional[bool]= True 
    id_obj: Optional[int]= None 


#### Pydantic Field Types

* None, type(None) or Literal[None]: Allows only None value
* bool: Booelan value
* int: Integer value
* float: Float value
* str: String value
* bytes: bytes are accepted as-is, bytearray
* list: allows list, tuple, set, frozenset, deque, or generators and casts to a list
* tuple: allows list, tuple, set, frozenset, deque, or generators and casts to a tuple
* Dict: dict(v) is used to attempt to convert a dictionary
* Set: allows list, tuple, set, frozenset, deque, or generators and casts to a set
* Frozenset:allows list, tuple, set, frozenset, deque, or generators and casts to a frozen set
* Deque: allows list, tuple, set, frozenset, deque, or generators and casts to a deque
* datetime.date: allow date from datetime.
* datetime.time: allow time from datetime
* datetime.datetime: allow datetime from datetime
* typing.Any: allows any value including None, thus an Any field is optional
* ipaddress.Ipv4Address:simply uses the type itself for validation by passing the value to IPv4Address(v)
* enum.Enum: checks that the value is a valid IntEnum instance
* decimal.Decimal: pydantic attempts to convert the value to a string, then passes the string to Decimal(v)
* pathlib.Path:simply uses the type itself for validation by passing the value to Path(v)
* uuid.UUID: strings and bytes (converted to strings) are passed to UUID(v)
* EmailStr:requires email-validator to be installed; the input string must be a valid email address,
* Json: a special type wrapper which loads JSON before parsing
* PaymentCardNumber:for parsing and validating payment cards
* AnyUrl: any URL
* PostgresDsn:a postgres DSN style URL


In [39]:
from datetime import datetime
from enum import Enum
import re
from pydantic import BaseSettings, Field, EmailStr, validator
from typing import Optional

class Sexo(str, Enum):
    masculino = "Masculino"
    femenino = "Femenino"
    indefinido = "Indefinido"


In [40]:
class Documentos(BaseSettings):
    IdentificacionFileName: Optional[str] = ''
    IdentificacionFile: Optional[str] = ''
    CedulaFileName: Optional[str] = ''
    CedulaFile: Optional[str] = ''


In [43]:
class Paciente(BaseSettings):
    Nombre: str
    ApellidoPaterno: str
    ApellidoMaterno: str
    FechaNacimiento: datetime
    Sexo: Optional[Sexo]
    NumeroCelular: str  # 10 digitos.
    CorreoElectronico: EmailStr

    
    @validator("NumeroCelular")
    def phone_number_must_have_10_digits(cls, v):
        match = re.match(r"\d{10}", v)
        if (match is None) or (len(v) < 10):
            raise ValueError("Phone number must have at least 10 digits")
        return v


### Introduction to orjson

ORJSON and other alternative json parsers to the builtin json package in python’s standard library provide extra tooling for object serialization as well as significantly increased read\write speed making this an ideal tool where workload demands are higher.
Its features and drawbacks compared to other Python JSON libraries:
* serializes dataclass instances 40-50x as fast as other libraries
* serializes datetime, date, and time instances to RFC 3339 format, e.g., "1970-01-01T00:00:00+00:00"
* serializes numpy.ndarray instances 4-12x as fast with 0.3x the memory usage of other libraries
* pretty prints 10x to 20x as fast as the standard library
* serializes to bytes rather than str, i.e., is not a drop-in replacement
* serializes str without escaping unicode to ASCII, e.g., "好" rather than "\\u597d"
* serializes float 10x as fast and deserializes twice as fast as other libraries
* serializes subclasses of str, int, list, and dict natively, requiring default to specify how to serialize others
* serializes arbitrary types using a default hook
* has strict UTF-8 conformance, more correct than the standard library
* has strict JSON conformance in not supporting Nan/Infinity/-Infinity
* has an option for strict JSON conformance on 53-bit integers with default support for 64-bit
* does not provide load() or dump() functions for reading from/writing to file-like objects



Install: pip install orjson

In [44]:
!pip install orjson



In [45]:
import orjson
from datetime import datetime
import uuid
dato = {"emoji_lagrimas": "😂", "emoji_reloj": "⏰","entero": 123,"flotante": 10.4,"boleano": False,"lista": ["element1", "element2"],"diccionario": {"key1": "value1", "key2": "value2"},
    "ruso": "Привет","chino": "您好","japones": "こんにちは","datetime": datetime.now(),"uuid": uuid.uuid1()
}


In [48]:
json_byte = orjson.dumps(dato)
print(json_byte)


b'{"emoji_lagrimas":"\xf0\x9f\x98\x82","emoji_reloj":"\xe2\x8f\xb0","entero":123,"flotante":10.4,"boleano":false,"lista":["element1","element2"],"diccionario":{"key1":"value1","key2":"value2"},"ruso":"\xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82","chino":"\xe6\x82\xa8\xe5\xa5\xbd","japones":"\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf","datetime":"2022-02-10T16:57:55.564860","uuid":"1ec25bb4-8ab4-11ec-914e-555cd90dffee"}'


#### Convert json byte to text


In [49]:
json_data = orjson.loads(json_byte)
print(json_data)
{'emoji_tears': '😂',
 'emoji_clock': '⏰',
 'integer': 123,
 'float': 10.4,
 'boolean': False,
 'list': ['element1', 'element2'],
 'dict': {'key1': 'value1', 'key2': 'value2'},
 'russian': 'Привет',
 'chinese': '您好',
 'japanese': 'こんにちは',
 'datetime': '2022-01-20T19:25:28.555227',
 'uuid': '40d49edc-7a48-11ec-865d-9078414427c1'}


{'emoji_lagrimas': '😂', 'emoji_reloj': '⏰', 'entero': 123, 'flotante': 10.4, 'boleano': False, 'lista': ['element1', 'element2'], 'diccionario': {'key1': 'value1', 'key2': 'value2'}, 'ruso': 'Привет', 'chino': '您好', 'japones': 'こんにちは', 'datetime': '2022-02-10T16:57:55.564860', 'uuid': '1ec25bb4-8ab4-11ec-914e-555cd90dffee'}


{'emoji_tears': '😂',
 'emoji_clock': '⏰',
 'integer': 123,
 'float': 10.4,
 'boolean': False,
 'list': ['element1', 'element2'],
 'dict': {'key1': 'value1', 'key2': 'value2'},
 'russian': 'Привет',
 'chinese': '您好',
 'japanese': 'こんにちは',
 'datetime': '2022-01-20T19:25:28.555227',
 'uuid': '40d49edc-7a48-11ec-865d-9078414427c1'}

#### Decimal data type


In [50]:
import decimal
orjson.dumps(decimal.Decimal("3.141592653"))



TypeError: Type is not JSON serializable: decimal.Decimal

Solución:

In [51]:
import decimal
import math 
def default(obj):
    if isinstance(obj, decimal.Decimal):
        return str(obj)
    raise TypeError


In [52]:
orjson.dumps(decimal.Decimal(f"{math.pi}"), default=default)


b'"3.141592653589793"'

#### Option


In [55]:
import orjson
import datetime
import numpy as np

data = {
"datetime": datetime.datetime.now(),
"numpy": np.array([[1, 2], [3, 4]])
}


In [56]:
json_byte = orjson.dumps(data, option=orjson.OPT_NAIVE_UTC | orjson.OPT_SERIALIZE_NUMPY)
print(json_byte)
print(orjson.loads(json_byte))


b'{"datetime":"2022-02-10T16:59:37.412149+00:00","numpy":[[1,2],[3,4]]}'
{'datetime': '2022-02-10T16:59:37.412149+00:00', 'numpy': [[1, 2], [3, 4]]}


#### Dataclass

In [57]:
import dataclasses, orjson, typing
@dataclasses.dataclass
class Person:
    id: int
    name: str
    status: bool = dataclasses.field(default=True)

@dataclasses.dataclass
class Class:
    id: int
    name: str
    students: typing.List[Person]




In [58]:
data = Class(1, "Class A", [Person(1, "John Doe", False), Person(2, "Mary Sue")])
json_byte = orjson.dumps(data)
print(json_byte)
print(orjson.loads(json_byte))


b'{"id":1,"name":"Class A","students":[{"id":1,"name":"John Doe","status":false},{"id":2,"name":"Mary Sue","status":true}]}'
{'id': 1, 'name': 'Class A', 'students': [{'id': 1, 'name': 'John Doe', 'status': False}, {'id': 2, 'name': 'Mary Sue', 'status': True}]}


#### Read/Write files

## References:

* https://www.seraph.to/python_typing.html#python_typing
* https://justgiveacar.medium.com/data-classes-in-python-991a3f68ddf9
* https://towardsdatascience.com/9-reasons-why-you-should-start-using-python-dataclasses-98271adadc66
* https://www.seraph.to/python_dataclass.html#python_dataclass
* https://medium.com/codex/getting-started-with-pydantic-as-a-data-validation-tool-in-api-development-2155deef37c4
* https://medium.com/short-bits/pydantic-better-data-validation-for-python-f4d2c07a7c
* https://pydantic-docs.helpmanual.io/
* https://pythonspeed.com/articles/faster-json-library/
* https://github.com/ijl/orjson
* https://pydantic-docs.helpmanual.io/usage/types/
