# Data Class Builders

## Overview of Data Class Builders

In [2]:
from coordinates import Coordinate

moscow = Coordinate(55.76, 37.62)
moscow  #1

<coordinates.Coordinate at 0x7f2c3764f970>

In [3]:
location = Coordinate(55.76, 37.62)
location == moscow  #2

False

In [4]:
(location.lat, location.lon) == (moscow.lat, moscow.lon)  #3

True

#### collections.namedtuple

In [5]:
from collections import namedtuple

Coordinate = namedtuple('Coordinate', 'lat lon')
print(issubclass(Coordinate, tuple))

moscow = Coordinate(55.756, 37.617)
print(moscow)  #1

moscow == Coordinate(lat=55.757, lon=37.617)  #2

True
Coordinate(lat=55.756, lon=37.617)


False

#### typing.NamedTuple

In [6]:
import typing

Coordinate = typing.NamedTuple('Coordinate', [('lat', float), ('lon', float)])
# alternative construction
# Coordinate = typing.NamedTuple('Coordinate', lat=float, lon=float)
print(issubclass, tuple)

typing.get_type_hints(Coordinate)

<built-in function issubclass> <class 'tuple'>


{'lat': float, 'lon': float}

In [13]:
# another alternative: class statement
from typing import NamedTuple

class Coordinate(NamedTuple):
    lat: float
    lon: float

    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.lon >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'

moscow = Coordinate(lat=55.757, lon=37.617)
print(moscow)

# Although NamedTuple appears in the class statement as a superclass, it's actually not.
# typing.NamedTuple uses the advanced functionality of a metaclass to customize the creation
# of the user's class
# print(issubclass(Coordinate, NamedTuple))
print(issubclass(Coordinate, tuple))

55.8°N, 37.6°E


TypeError: issubclass() arg 2 must be a class, a tuple of classes, or a union

#### dataclasses.dataclass

In [14]:
from dataclasses import dataclass

@dataclass(frozen=True)
class Coordinate:
    lat: float
    lon: float

    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.lon >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'

## Classic Named Tuples

In [1]:
from collections import namedtuple

City = namedtuple('City', 'name country population coordinates')  #1
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))  #2
tokyo

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))

In [2]:
tokyo.population  #3

36.933

In [3]:
tokyo.coordinates

(35.689722, 139.691667)

In [4]:
tokyo[1]

'JP'

In [5]:
City._fields  #1

('name', 'country', 'population', 'coordinates')

In [8]:
Coordinate = namedtuple('Coordinate', 'lat lon')
delhi_data = ('Delhi NCR', 'IN', 21.935, Coordinate(28.613889, 77.208889))
delhi = City._make(delhi_data)  #2
delhi._asdict()  #3

{'name': 'Delhi NCR',
 'country': 'IN',
 'population': 21.935,
 'coordinates': Coordinate(lat=28.613889, lon=77.208889)}

In [9]:
import json
json.dumps(delhi._asdict())

'{"name": "Delhi NCR", "country": "IN", "population": 21.935, "coordinates": [28.613889, 77.208889]}'

In [10]:
# nametuple default values
Coordinate = namedtuple('Coordinate', 'lat lon reference', defaults=['WGS84'])
Coordinate(0, 0)

Coordinate(lat=0, lon=0, reference='WGS84')

In [11]:
Coordinate._field_defaults

{'reference': 'WGS84'}

#### Hacking a namedtuple to inject a method

In [13]:
from collections import namedtuple

Card = namedtuple('Card', ['rank', 'suit'])
Card.suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)  #1

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list("JQKA")
    suits = "spades diamonds clubs hearts".split()

    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits for rank in self.ranks]
    
    def __len__(self):
        return len(self._cards)
    
    # benefits of implementing this
    def __getitem__(self, position):
        return self._cards[position]

def spades_high(card):  #2
    rank_value = FrenchDeck.ranks.index(card.rank)
    suit_value = card.suit_values[card.suit]
    return rank_value * len(card.suit_values) + suit_value

Card.overall_rank = spades_high  #3
lowest_card = Card('2', 'clubs')
highest_card = Card('A', 'spades')

lowest_card.overall_rank()  #4

0

In [14]:
highest_card.overall_rank()

51

## Typed Named Tuples

In [4]:
# Coordinate class with a default field using typing.NamedTuple
from typing import NamedTuple

class Coordinate(NamedTuple):
    lat: float  #1
    lon: float
    reference: str = 'WGS84'  #2

c = Coordinate(28.613889, 77.208889)
print(c.reference)
print(c[1])
print(c._field_defaults)
print(c._fields)

WGS84
77.208889
{'reference': 'WGS84'}
('lat', 'lon', 'reference')


## Type Hints 101

### No Runtime Effect

In [5]:
import typing

class Coordinate(typing.NamedTuple):
    lat: float
    lon: float

trash = Coordinate('Ni!', None)
print(trash)  #1

Coordinate(lat='Ni!', lon=None)


### Variable Annotation Syntax

In [None]:
# basic syntax
# var_name: some_type
name: str
name = 'Johannes'
# initialize the variable with a value (in NameTuple or dataclass, that value will become the
# default value for that attribute)
last_name: str = 'Ferreira'

### The Meaning of Variable Annotations

In [8]:
class DemoPlainClass:
    a: int  #1
    b: float = 1.1  #2
    c = 'spam'  #3

print(DemoPlainClass.__annotations__)
DemoPlainClass.a

{'a': <class 'int'>, 'b': <class 'float'>}


AttributeError: type object 'DemoPlainClass' has no attribute 'a'

In [9]:
DemoPlainClass.b

1.1

In [10]:
DemoPlainClass.c

'spam'

### Inspecting a typing.NamedTuple

In [11]:
import typing

class DemoNTClass(typing.NamedTuple):
    a: int  #1
    b: float = 1.1  #2
    c = 'spam'  #3

DemoNTClass.__annotations__

{'a': int, 'b': float}

In [12]:
DemoNTClass.a

_tuplegetter(0, 'Alias for field number 0')

In [13]:
DemoNTClass.b

_tuplegetter(1, 'Alias for field number 1')

In [14]:
DemoNTClass.c

'spam'

In [15]:
DemoNTClass.__doc__

'DemoNTClass(a, b)'

In [16]:
nt = DemoNTClass(8)
nt.a

8

In [17]:
nt.b

1.1

In [18]:
nt.c

'spam'

In [19]:
nt.a = 9

AttributeError: can't set attribute

In [20]:
nt.b = 2.2

AttributeError: can't set attribute

In [21]:
nt.c = 'email'

AttributeError: 'DemoNTClass' object attribute 'c' is read-only

### Inspecting a class decorated wiht dataclass

In [23]:
from dataclasses import dataclass

@dataclass
class DemoDataClass:
    a: int  #1
    b: float = 1.1  #2
    c = 'spam'  #3

In [24]:
DemoDataClass.__annotations__

{'a': int, 'b': float}

In [25]:
DemoDataClass.__doc__

'DemoDataClass(a: int, b: float = 1.1)'

In [26]:
DemoDataClass.a

AttributeError: type object 'DemoDataClass' has no attribute 'a'

In [27]:
DemoDataClass.b

1.1

In [28]:
DemoDataClass.c

'spam'

In [29]:
dc = DemoDataClass(9)
dc.a

9

In [30]:
dc.b

1.1

In [31]:
dc.c

'spam'

In [33]:
dc.a = 10
dc.b = 'oops'
dc.c = 'whatever'
dc.z = 'secret stash'

In [35]:
DemoDataClass.c

'spam'

In [36]:
id(DemoDataClass.c), id(dc.c)

(140577955335344, 140577955812336)

## More About @dataclass

### Field Options

In [None]:
from dataclasses import dataclass

@dataclass
class ClubMember:
    guest: list = None
    name: str  # TypeError: non-default argument 'name' follows default argument

ClubMember()

In [None]:
from dataclasses import dataclass

@dataclass
class ClubMember:
    name: str
    guest: list = []  # ValueError: mutable default  for field guest is not allowed: use default_factory

ClubMember()

In [2]:
# Suggested solution for mutable default values
from dataclasses import dataclass, field

@dataclass
class ClubMember:
    name: str
    guests: list[str] = field(default_factory=list)  # list[str] means "a list of str"

ClubMember("Jhon")

ClubMember(name='Jhon', guests=[])

In [1]:
@dataclass
class ClubMember:
    name: str
    guests: list = field(default_factory=list)
    athlete: bool = field(default=False, repr=False)

c = ClubMember("Jhon")
c

NameError: name 'dataclass' is not defined

In [10]:
c.athlete

False

### Post-init Processing

In [3]:
"""
``HackerClubMember`` objects accept an optional 'handle' argument::
    >>> anna = HackerClubMember('Anna Ravenscroft', handle='AnnaRaven')
    >>> anna
    HackerClubMember(name='Anna Ravenscroft', guests=[], handle='AnnaRaven')
If 'handle' is omitted, it's set to the first part of the member's name::
    >>> leo = HackerClubMember('Leo Rochael')
    >>> leo
    HackerClubMember(name'Leo Rochael', guests=[], handle='Leo')
Members must have a unique handle. The following ''leo2'' will not be created,
because its ''handle'' wold be 'Leo', wichi was taken by ''leo''::
    >>> leo2 = HackerClubMember('Leo DaVinci')
    Traceback (most recent call last):
    ...
    ValueError: handle 'Leo' already exists.
To fix, ''leo2'' must be created with an explicit ''handle''::
    >>> leo2 = HackerClubMember('Leo DaVinci', handle='Neo')
"""
from dataclasses import dataclass, field

@dataclass
class ClubMember:
    name: str
    guests: list[str] = field(default_factory=list)


@dataclass
class HackerClubMember(ClubMember):  #1
    all_handles = set()  #2 (class variable becuse no typing was provided)
    handle: str = ''  #3

    def __post_init__(self):
        cls = self.__class__  #4
        
        if self.handle == '':  #5
            self.handle = self.name.split()[0]
        
        if self.handle in cls.all_handles:  #6
            msg = f"handle {self.handle!r} already exists."
            raise ValueError(msg)
        
        cls.all_handles.add(self.handle)  #7

In [4]:
anna = HackerClubMember('Anna Ravenscroft', handle='AnnaRaven')
anna

HackerClubMember(name='Anna Ravenscroft', guests=[], handle='AnnaRaven')

In [5]:
leo = HackerClubMember('Leo Rochael')
print(leo)
leo2 = HackerClubMember('Leo DaVinci')

HackerClubMember(name='Leo Rochael', guests=[], handle='Leo')


ValueError: handle 'Leo' already exists.

### Initialization Variables That Are Not Fields

In [7]:
from dataclasses import dataclass, InitVar, fields

class DatabaseType:
    def lookup(self, j):
        print(j)


@dataclass
class C:
    i: int
    j: int = None
    database: InitVar[DatabaseType] = None

    def __post_init__(self, database):
        if self.j is None and database is not None:
            self.j = database.lookup('j')

c = C(10, database=DatabaseType())
print(c.database)

j
None


### @dataclass Example: Dublin Core Resource Record

In [11]:
from dataclasses import dataclass, field
from typing import Optional
from enum import Enum, auto
from datetime import date

class ResourceType(Enum):  #1
    BOOK = auto()
    EBOOK = auto()
    VIDEO = auto()

@dataclass
class Resource:
    """Media resource description."""
    identifier: str  #2
    title: str = '<untitled>'  #3
    creators: list[str] = field(default_factory=list)
    date: Optional[date] = None  #4
    type: ResourceType = ResourceType.BOOK  #5
    description: str = ''
    language: str = ''
    subjects: list[str] = field(default_factory=list)

    def __repr__(self):
        cls = self.__class__
        cls_name = cls.__name__
        indent = ' ' * 4
        res = [f'{cls_name}(']
        for f in fields(cls):
            value = getattr(self, f.name)
            res.append(f'{indent}{f.name} = {value!r},')
        
        res.append(')')
        return '\n'.join(res)

In [13]:
description = 'Improving the design of existing code'
book = Resource('978-0-13-475759-9', 'Refactoring 2nd Edition',
                ['Martin Fowler', 'Kent Beck'], date(2018, 11, 19), ResourceType.BOOK,
                description, 'EN', ['computer programming', 'OOP'])
book  # doctest: +NORMALIZE_WHITESPACE

Resource(
    identifier = '978-0-13-475759-9',
    title = 'Refactoring 2nd Edition',
    creators = ['Martin Fowler', 'Kent Beck'],
    date = datetime.date(2018, 11, 19),
    type = <ResourceType.BOOK: 1>,
    description = 'Improving the design of existing code',
    language = 'EN',
    subjects = ['computer programming', 'OOP'],
)

## Pattern Matching Class Instances

### Simple Class Patterns

### Keyword Class Patterns

In [5]:
import typing

class City(typing.NamedTuple):
    continent: str
    name: str
    country: str

cities = [
    City('Asia', 'Tokyo', 'JP'),
    City('Asia', 'Delhi', 'IN'),
    City('North America', 'Mexico City', 'MX'),
    City('North America', 'New York', 'US'),
    City('South America', 'São Paulo', 'BR'),
]

def match_asian_cities():
    results = []
    for city in cities:
        match city:
            case City(continent='Asia'):
                results.append(city)
    
    return results

match_asian_cities()

[City(continent='Asia', name='Tokyo', country='JP'),
 City(continent='Asia', name='Delhi', country='IN')]

In [6]:
def match_asina_countries():
    results = []
    for city in cities:
        match city:
            case City(continent='Asia', country=cc):  # country values is collected into cc variable
                results.append(cc)  # and appended to the results
    
    return results

match_asina_countries()

['JP', 'IN']

### Positional Class Patterns

In [7]:
# The following function would return a list of Asian cities, using a positional class pattern
def match_asian_cities_pos1():
    results = []
    for city in cities:
        match city:
            case City('Asia'):
                results.append(city)
    
    return results

match_asian_cities_pos1()

[City(continent='Asia', name='Tokyo', country='JP'),
 City(continent='Asia', name='Delhi', country='IN')]

In [9]:
# If you want to collect the value of the country attribute, you could write:
def match_asian_cities_pos2():
    results = []
    for city in cities:
        match city:
            case City('Asia', _, country):
                results.append(country)
    
    return results

match_asian_cities_pos2()

['JP', 'IN']

In [10]:
City.__match_args__

('continent', 'name', 'country')