A dataclass is simply a code generator that allows us to define custom classes using a different syntax, and allows us to generate what is often referred to as "boilerplate" code - code that is repetitive and basically always works the same way. Essentially a dataclass is a class decorator that can either monkey patch an existing class, or, when slots are involved, generates a new class based on the old one, with extra functionality injected.

python example

In [1]:
class Circle:
    def __init__(self, x: int = 0, y: int = 0, radius: int = 1):
        self.x = x
        self.y = y
        self.radius = radius

In [2]:
c = Circle()
c



<__main__.Circle at 0x221dd941c90>

Let's add some functionality that we usually add (or should add) to our class.

First, let's have a custom __repr__

In [3]:
class Circle:
    def __init__(self, x: int = 0, y: int = 0, radius: int = 1):
        self.x = x
        self.y = y
        self.radius = radius

    def __repr__(self):
        return f"{self.__class__.__qualname__}(x={self.x}, y={self.y}, radius={self.radius})"

In [4]:
c1 = Circle(0, 0, 1)
c1



Circle(x=0, y=0, radius=1)

Now let's see how we can do the same thing using a dataclass:

In [5]:
from dataclasses import dataclass

In [6]:
@dataclass
class CircleD:
    x: int = 0
    y: int = 0
    radius: int = 1 

In [7]:
c2 = CircleD()
c2

CircleD(x=0, y=0, radius=1)

In [10]:
c3 = CircleD(1, 1, 5)
c4 = CircleD(1, 1, 5)
c3 == c4


True

back to plain python 

In [8]:
class Circle:
    def __init__(self, x: int = 0, y: int = 0, radius: int = 1):
        self.x = x
        self.y = y
        self.radius = radius

    def __repr__(self):
        return f"{self.__class__.__qualname__}(x={self.x}, y={self.y}, radius={self.radius})"
    
    def __eq__(self, other):
        if self.__class__ == other.__class__:
            return (self.x, self.y, self.radius) == (other.x, other.y, other.radius)
        return NotImplemented

In [9]:
c1 = Circle(0, 0, 1)
c2 = Circle(0, 0, 1)

c1 is c2, c1 == c2

(False, True)

hash for plain python

In [11]:
class Circle:
    def __init__(self, x: int = 0, y: int = 0, radius: int = 1):
        self.x = x
        self.y = y
        self.radius = radius

    def __repr__(self):
        return f"{self.__class__.__qualname__}(x={self.x}, y={self.y}, radius={self.radius})"
    
    def __eq__(self, other):
        if self.__class__ == other.__class__:
            return (self.x, self.y, self.radius) == (other.x, other.y, other.radius)
        return NotImplemented
    
    def __hash__(self):
        return hash((self.x, self.y, self.radius))

In [12]:
c1 = Circle(0, 0, 1)
c2 = Circle(0, 0, 1)
c1 == c2, hash(c1) == hash(c2)


(True, True)


Immutability

To make our custom class implementation better we need to make the attributes used in the hash, x, y, and radius immutable.

Let's add even more boilerplate code to our class:


In [13]:
class Circle:
    def __init__(self, x: int = 0, y: int = 0, radius: int = 1):
        self._x = x
        self._y = y
        self._radius = radius

    @property
    def x(self):
        return self._x
    
    @property
    def y(self):
        return self._y
    
    @property
    def radius(self):
        return self._radius
    
    def __repr__(self):
        return f"{self.__class__.__qualname__}(x={self.x}, y={self.y}, radius={self.radius})"
    
    def __eq__(self, other):
        if self.__class__ == other.__class__:
            return (self.x, self.y, self.radius) == (other.x, other.y, other.radius)
        return NotImplemented
    
    def __hash__(self):
        return hash((self.x, self.y, self.radius))

dataclass immutable

In [14]:
@dataclass(frozen=True)
class CircleD:
    x: int = 0
    y: int = 0
    radius: int = 1

c3 = CircleD()
c4 = CircleD(1, 1, 1)
c5 = CircleD()
c3, c4, c5



(CircleD(x=0, y=0, radius=1),
 CircleD(x=1, y=1, radius=1),
 CircleD(x=0, y=0, radius=1))

In [15]:
c3 == c5, c4 == c5

(True, False)

In [16]:
c3 == c5, c4 == c5

(True, False)

Adding __lt__, __le__, __gt__, __ge__ to plain python 

In [3]:
class Circle:
    def __init__(self, x: int = 0, y: int = 0, radius: int = 1):
        self._x = x
        self._y = y
        self._radius = radius

    @property
    def x(self):
        return self._x
    
    @property
    def y(self):
        return self._y
    
    @property
    def radius(self):
        return self._radius
    
    def __repr__(self):
        return f"{self.__class__.__qualname__}(x={self.x}, y={self.y}, radius={self.radius})"
    
    def __eq__(self, other):
        if self.__class__ == other.__class__:
            return (self.x, self.y, self.radius) == (other.x, other.y, other.radius)
        return NotImplemented
    
    def __hash__(self):
        return hash((self.x, self.y, self.radius))
    
    def __lt__(self, other):
        if self.__class__ == other.__class__:
            return (self.x, self.y, self.radius) < (other.x, other.y, other.radius)
        return NotImplemented

In [4]:
c1 = Circle(0, 0, 1)
c2 = Circle(1, 1, 1)
c1 < c2



True

dataclass implementations

In [6]:
from dataclasses import dataclass
@dataclass(frozen=True, order=True)
class CircleD:
    x: int = 0
    y: int = 0
    radius: int = 1

In [7]:
c1 = CircleD()
c2 = CircleD(1, 1, 1)

c1 < c2, c1 <= c2, c2 > c1, c2 >= c1


(True, True, True, True)

Dataclasses Serialization

In [8]:
from dataclasses import asdict, astuple

In [9]:
c1 = CircleD()

asdict(c1)



{'x': 0, 'y': 0, 'radius': 1}

In [10]:
astuple(c1)

(0, 0, 1)

If we wanted something similar in our custom class, we would have to write that code ourselves.

In [11]:
from functools import total_ordering


In [12]:
@total_ordering
class Circle:
    def __init__(self, x: int = 0, y: int = 0, radius: int = 1):
        self._x = x
        self._y = y
        self._radius = radius

    @property
    def x(self):
        return self._x
    
    @property
    def y(self):
        return self._y
    
    @property
    def radius(self):
        return self._radius
    
    def __repr__(self):
        return f"{self.__class__.__qualname__}(x={self.x}, y={self.y}, radius={self.radius})"
    
    def __eq__(self, other):
        if self.__class__ == other.__class__:
            return (self.x, self.y, self.radius) == (other.x, other.y, other.radius)
        return NotImplemented
    
    def __hash__(self):
        return hash((self.x, self.y, self.radius))
    
    def __lt__(self, other):
        if self.__class__ == other.__class__:
            return (self.x, self.y, self.radius) < (other.x, other.y, other.radius)
        return NotImplemented
    
    def asdict(self):
        return {
            'x': self.x,
            'y': self.y,
            'radius': self.radius
        }
    
    def astuple(self):
        return self.x, self.y, self.radius

In [13]:
c1 = Circle()
c1.asdict()

{'x': 0, 'y': 0, 'radius': 1}

In [14]:
c1.astuple()

(0, 0, 1)

Adding Methods and Properties to Dataclasses

In [18]:
from math import pi

@dataclass(frozen=True, order=True)
class CircleD:
    x: int = 0
    y: int = 0
    radius: int = 1
        
    @property
    def area(self):
        return pi * self.radius ** 2
    
    def circumference(self):
        return 2 * pi * self.radius

c = CircleD()
c.area, c.circumference()


(3.141592653589793, 6.283185307179586)

Overriding in dataclass

In [21]:
from math import pi, dist
@total_ordering
@dataclass(frozen=True)
class CircleD:
    x: int = 0
    y: int = 0
    radius: int = 1
        
    @property
    def area(self):
        return pi * self.radius ** 2
    
    def circumference(self):
        return 2 * pi * self.radius
    
    def __lt__(self, other):
        if self.__class__ == other.__class__:
            return dist((0, 0), (self.x, self.y)) < dist((0, 0), (other.x, other.y))
        return NotImplemented

In [22]:
c1 = CircleD(2, 2, 10)
c2 = CircleD(3, 3, 100)

c1 <= c2

True

If we wanted to make all the arguments in our __init__ keyword-only arguments, it's even simpler:

In [25]:
@dataclass(frozen=True, order=True, kw_only=True)
class CircleD:
    x: int = 0
    y: int = 0
    radius: int = 1

c = CircleD(x=0, y=0, radius=1)
c


CircleD(x=0, y=0, radius=1)