In [1]:
from dataclasses import (
    dataclass,
    field,
)

# field

In [3]:
@dataclass
class C:
    a: int         # 'a' has no default value
    b: int = 0     # 'b' has a default value of 0
    my_list: list[int] = field(default_factory=list)

c = C(a=10)
print(c.my_list)   # an empty list by default

c.my_list += [1, 2, 3]
print(c.my_list)

c.my_list = [4, 5, 6]
print(c.my_list)

c.my_list = "Not a list"
print(c.my_list)

[]
[1, 2, 3]
[4, 5, 6]
Not a list


In [None]:
from datetime import datetime

def sample_factory():
    return datetime.now().isoformat()

@dataclass 
class Foo:
    a: int = 1
    b: str = field(default_factory=sample_factory)   # for mutable default value

foo = Foo()
bar = Foo()

print(foo.b)
print(bar.b)

2024-11-16T23:43:43.236885
2024-11-16T23:43:43.237928


In [None]:
@dataclass
class C:
    x: int
    y: int = field(repr=False)
    z: int = field(repr=False, default=10)
    t: int = 20

print(C.z, C.t, sep=", ")   # these are class variables
# print(C.x, C.y)           # these are not class variables  => raise AttributeError

c = C(x=1, y=2)   
print(c)   # y and z is hidden because repr=False

10, 20
C(x=1, t=20)


# Field

- Field objects dùng để mô tả từng trường (field) được được định nghĩa trong dataclass.
=> Nói cách khác, mỗi field được định nghĩa tương ứng với một Field object
- Object này được tạo internally và được trả về bởi fields() method => không nên khởi tạo Field một cách trực tiếp.

# fields

In [13]:
# Trả về tuple gồm các Field objects được định nghĩa trong dataclass
from dataclasses import fields

@dataclass
class Spam:
    a: int = 1
    b: int = 2
    c: float = 3.14

print(*fields(Spam), sep="\n")

spam = Spam()
print(fields(spam))

Field(name='a',type=<class 'int'>,default=1,default_factory=<dataclasses._MISSING_TYPE object at 0x000001B705132E10>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD)
Field(name='b',type=<class 'int'>,default=2,default_factory=<dataclasses._MISSING_TYPE object at 0x000001B705132E10>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD)
Field(name='c',type=<class 'float'>,default=3.14,default_factory=<dataclasses._MISSING_TYPE object at 0x000001B705132E10>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD)
(Field(name='a',type=<class 'int'>,default=1,default_factory=<dataclasses._MISSING_TYPE object at 0x000001B705132E10>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD), Field(name='b',type=<class 'int'>,default=2,default_factory=<dataclasses._MISSING_TYPE object at 0x000001B705132E

# asdict

In [18]:
# Convert a dataclass to a dict (convert all of its fields to name: value pairs)
from dataclasses import asdict, fields

@dataclass
class Point:
    x: int
    y: int

@dataclass     # a nested dataclass
class C:
    my_list: list[Point]


p = Point(10, 20)
assert asdict(p) == {"x": 10, "y": 20}

c = C([Point(0, 0), Point(10, 20)])
assert asdict(c) == {
    "my_list": [
        {"x": 0, "y": 0},
        {"x": 10, "y": 20}
    ]
}

In [None]:
# Create a shallow copy 
shallow_p = {field.name: getattr(p, field.name) for field in fields(p)}
print(shallow_p)

p.x = 50
print(shallow_p)      # dict is not changed

shallow_p["x"] = 50   # this changes the attribute x of dataclass p
print(p.x)

{'x': 50, 'y': 20}
{'x': 50, 'y': 20}
50


# astuple

In [28]:
# Convert a dataclass to a tuple (of its field values)
from dataclasses import astuple

assert astuple(p) == (50, 20)
assert astuple(c) == ([(0, 0), (10, 20)],)    # a tuple appears first by default 

# To create a shallow copy 
tuple(
    getattr(p, field.name) for field in fields(p)
)

(50, 20)

In [32]:
fields(p)
for field in fields(p):
    print(field.name, field.type, field.default, field.default_factory, sep=" - ")

x - <class 'int'> - <dataclasses._MISSING_TYPE object at 0x000002732C942BD0> - <dataclasses._MISSING_TYPE object at 0x000002732C942BD0>
y - <class 'int'> - <dataclasses._MISSING_TYPE object at 0x000002732C942BD0> - <dataclasses._MISSING_TYPE object at 0x000002732C942BD0>


# replace

In [24]:
from dataclasses import replace

@dataclass
class Foo:
    a: int = 1
    b: int = 2
    c: int = 3

foo = Foo()
print(*[f"{field.name}: {getattr(foo, field.name)}" for field in fields(foo)], sep=", ")

new_foo = replace(foo, a=50, b=10, c=20)   # create a new dataclass instance from an existing dataclass instance with field values changed
print(*[f"{field.name}: {getattr(new_foo, field.name)}" for field in fields(new_foo)], sep=", ")

a: 1, b: 2, c: 3
a: 50, b: 10, c: 20


# is_dataclass

In [None]:
# Kiểm tra xem một object có phải là một dataclass / dataclass instance hay không 
from dataclasses import is_dataclass

@dataclass
class Sample:
    name: str = "a dataclass"

@dataclass
class Foo:
    a: int = 1
    b: int = 2
    c = Sample
    d = Sample()

print(is_dataclass(Foo))      # dataclass object 
print(is_dataclass(Foo()))    # dataclass instance 
print(is_dataclass(Foo().c))  # a field that is a dataclass object
print(is_dataclass(Foo().d))  # a field that is a dataclass instance

True
True
True
True


In [29]:
def is_dataclass_instance(obj):
    """Check if an object is a dataclass instance or not.
    """

    return is_dataclass(obj) and not isinstance(obj, type)

print(is_dataclass_instance(Foo))
print(is_dataclass_instance(Foo()))

False
True


In [None]:
print(isinstance(Foo(), type))
print(isinstance(Foo, type))

print(type(Foo))    # Foo is an instance of type

False
True
<class 'type'>


# Sentinel value

In [None]:
# Often used to distinguish between "No value provided" and None value
from dataclasses import dataclass, field, fields, MISSING

@dataclass
class Example:
    required: int
    no_default: int = field(default=MISSING)  # # MISSING is used to indicate a field that has no default value => must provide a value explicitly
    optional: int = field(default=42)

fields(Example)

(Field(name='required',type=<class 'int'>,default=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,default_factory=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD),
 Field(name='no_default',type=<class 'int'>,default=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,default_factory=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD),
 Field(name='optional',type=<class 'int'>,default=42,default_factory=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD))

In [16]:
# KW_ONLY: giá trị sentinel được sử dụng là một type annotation
from dataclasses import KW_ONLY
@dataclass
class Point:
    x: float
    _: KW_ONLY   # pseudo field (of type KW_ONLY); completely ignored; the name of _ is just a convention
    y: float     # keyword-only field
    z: float     # keyword-only field

p = Point(0, y=1.5, z=2.0)
# p = Point(0, 1.5, 2.0)     # TypeError
fields(Point)    # this does not list the pseudo-field _

(Field(name='x',type=<class 'float'>,default=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,default_factory=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD),
 Field(name='y',type=<class 'float'>,default=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,default_factory=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=True,_field_type=_FIELD),
 Field(name='z',type=<class 'float'>,default=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,default_factory=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=True,_field_type=_FIELD))

In [None]:
# Post-init processing
# __post_init__(): sẽ được gọi tự động bởi __init__() của dataclass
# Nếu __init__() không được gọi thì __post_init__() cũng sẽ không được gọi
# Mục đích: thường dùng để khởi tạo field values mà phụ thuộc vào các fields khác
from dataclasses import dataclass, field

@dataclass
class C:
    a: float
    b: float
    c: float = field(init=False)   # không khởi tạo lúc gọi __init__() => không cần truyền đối số lúc tạo class instance

    def __post_init__(self):
        self.c = self.a + self.b

c = C(a=1.0, b=2.0)
print(c.c)

# c = C(a=1.0, b=2.0, c=3.0)   # TypeError: unexpected argument

3.0


In [None]:
# Inheritance with dataclass

class Rectangle:
    def __init__(self, height, width):
        self.height = height
        self.width = width

@dataclass
class Square(Rectangle):
    side: float

    def __post_init__(self):
        super().__init__(self.side, self.side)

square = Square(side=4)
print(fields(square))
print(square.width)
print(square.height)

(Field(name='side',type=<class 'float'>,default=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,default_factory=<dataclasses._MISSING_TYPE object at 0x000001D3F94D2BD0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD),)
4
4


In [26]:
# Class variables & Init-only variables