# pyprobasic

## @dataclass

### import

In [1]:
from loguru import logger
from functools import wraps
from inspect import signature, getmembers
from dataclasses import dataclass
from typing import Dict, List, Any
import sys


logger.debug(_message, *args, **kwargs)
logger.info(_message, *args, **kwargs)
logger.success(_message, *args, **kwargs)
logger.warning(_message, *args, **kwargs)
logger.error(_message, *args, **kwargs)
logger.critical(_message, *args, **kwargs)

### @dataclass

In [2]:
from dataclasses import dataclass
from typing import Dict
import numpy as np
@dataclass
class Data():
    X: np.ndarray = None  # The field declaration: X
    y: np.array = None    # The field declaration: y
    kwargs: Dict = None   # The field declaration: kwargs

In [3]:
data1 = Data()
data2 = Data()
data1 == data1

True

In [4]:
print(data1)
data1

Data(X=None, y=None, kwargs=None)


Data(X=None, y=None, kwargs=None)

In [5]:
@dataclass(unsafe_hash=True)
class Data:
    X: np.ndarray = None
    y: np.array = None
    kwargs: Dict = None
    
data1 = Data()
data2 = Data()    
data3 = Data(1,2,3)

In [6]:
{data3:2}

{Data(X=1, y=2, kwargs=3): 2}

In [65]:
class CrossValidation:
    def __init__(self, inner_cv, outer_cv,
                     eval_final_performance, test_size,
                     calculate_metrics_per_fold,
                     calculate_metrics_across_folds):
            self.inner_cv = inner_cv
            self.outer_cv = outer_cv
            self.eval_final_performance = eval_final_performance
            self.test_size = test_size
            self.calculate_metrics_per_fold = calculate_metrics_per_fold
            self.calculate_metrics_across_folds = calculate_metrics_across_folds
            self.outer_folds = None
            self.inner_folds = dict()

In [69]:
@dataclass
class CrossValidation:
    inner_cv: int
    outer_cv: int
    eval_final_performance: bool = True
    test_size: float = 0.2
    calculate_metrics_per_fold: bool = True
    calculate_metrics_across_folds: bool = False
    outer_folds =  None
    inner_folds = None

In [9]:
cv1 = CrossValidation()

TypeError: __init__() missing 2 required positional arguments: 'inner_cv' and 'outer_cv'

In [70]:
cv1 = CrossValidation(1,2)
cv2 = CrossValidation(1,2)
cv3 = CrossValidation(3,2,test_size=0.5)
print(cv1)
cv3

CrossValidation(inner_cv=1, outer_cv=2, eval_final_performance=True, test_size=0.2, calculate_metrics_per_fold=True, calculate_metrics_across_folds=False)


CrossValidation(inner_cv=3, outer_cv=2, eval_final_performance=True, test_size=0.5, calculate_metrics_per_fold=True, calculate_metrics_across_folds=False)

In [71]:
help(cv1)

Help on CrossValidation in module __main__ object:

class CrossValidation(builtins.object)
 |  CrossValidation(inner_cv: int, outer_cv: int, eval_final_performance: bool = True, test_size: float = 0.2, calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False) -> None
 |  
 |  CrossValidation(inner_cv: int, outer_cv: int, eval_final_performance: bool = True, test_size: float = 0.2, calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False)
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, other)
 |  
 |  __init__(self, inner_cv: int, outer_cv: int, eval_final_performance: bool = True, test_size: float = 0.2, calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False) -> None
 |  
 |  __repr__(self)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref

In [58]:
sys.version_info

sys.version_info(major=3, minor=7, micro=6, releaselevel='final', serial=0)

In [11]:
cv1 == cv2

True

In [12]:
cv1 == cv3

False

### Inspecting @dataclass Generation of def class boilerplate

In [13]:
help(Data)

Help on class Data in module __main__:

class Data(builtins.object)
 |  Data(X: numpy.ndarray = None, y: <built-in function array> = None, kwargs: Dict = None) -> None
 |  
 |  Data(X: numpy.ndarray = None, y: <built-in function array> = None, kwargs: Dict = None)
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, other)
 |  
 |  __hash__(self)
 |  
 |  __init__(self, X: numpy.ndarray = None, y: <built-in function array> = None, kwargs: Dict = None) -> None
 |  
 |  __repr__(self)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  X = None
 |  
 |  __annotations__ = {'X': <class 'numpy.ndarray'>, 'kwargs': typing.Dict...
 |  
 |  __dataclas

In [14]:
getmembers(Data)

[('X', None),
 ('__annotations__',
  {'X': numpy.ndarray, 'y': <function numpy.array>, 'kwargs': typing.Dict}),
 ('__class__', type),
 ('__dataclass_fields__',
  {'X': Field(name='X',type=<class 'numpy.ndarray'>,default=None,default_factory=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
   'y': Field(name='y',type=<built-in function array>,default=None,default_factory=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
   'kwargs': Field(name='kwargs',type=typing.Dict,default=None,default_factory=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD)}),
 ('__dataclass_params__',
  _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=True,frozen=False)),
 ('__delattr__', <slot wrapper '__delattr__' of 

In [15]:
print(signature(data1.__init__))

(X: numpy.ndarray = None, y: <built-in function array> = None, kwargs: Dict = None) -> None


In [16]:
print(signature(data1.__eq__))

(other)


In [17]:
print(signature(cv1.__init__))

(inner_cv: int, outer_cv: int, eval_final_performance: bool = True, test_size: float = 0.2, calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False) -> None


### aguments for @dataclass

Clicking shift-<tab> shows the signature and the default for all argumens for @dataclass.

In [18]:
#@dataclass

In [19]:
class Data():
    X: np.ndarray = None  # The field declaration: X
    y: np.array = None    # The field declaration: y
    kwargs: Dict = None   # The field declaration: kwargs
#  ... default autogenerated methods, plus
    def __ge__(self, other):
        return self.val >= other.val
    def __gt__(self, other):
        return self.val > other.val
    def __le__(self, other):
        return self.val <= other.val
    def __lt__(self, other):
        return self.val < other.val

In [20]:
@dataclass(order = True)
class Data():
    X: np.ndarray = None  # The field declaration: X
    y: np.array = None    # The field declaration: y
    kwargs: Dict = None   # The field declaration: kwargs

In [21]:
help(Data)

Help on class Data in module __main__:

class Data(builtins.object)
 |  Data(X: numpy.ndarray = None, y: <built-in function array> = None, kwargs: Dict = None) -> None
 |  
 |  Data(X: numpy.ndarray = None, y: <built-in function array> = None, kwargs: Dict = None)
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, other)
 |  
 |  __ge__(self, other)
 |  
 |  __gt__(self, other)
 |  
 |  __init__(self, X: numpy.ndarray = None, y: <built-in function array> = None, kwargs: Dict = None) -> None
 |  
 |  __le__(self, other)
 |  
 |  __lt__(self, other)
 |  
 |  __repr__(self)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  X = None
 |  
 |  _

In [22]:
data1 = Data()
data2 = Data()    
data3 = Data(1,2,3)
print(data1 < data2)
print(data1 <= data2)
print(data1 > data2)
print(data1 >= data2)


False
True
False
True


### explicit @property and @setproperty boilerplate no longer needed

The is so much easier to read and it is intutive

In [23]:
@dataclass
class Data():
    X: np.ndarray = None  # The field declaration: X
    y: np.array = None    # The field declaration: y
    kwargs: Dict = None   # The field declaration: kwargs
        
d = Data()
d.kwargs

In [24]:
d.kwargs = {'one':1}
d.kwargs

{'one': 1}

In [25]:
d.kwargs = 1
d.kwargs

1

Notice type hints are ignored

### Using __slots__

In [26]:
@dataclass
class LoggingState:
    __slots__ =  ['debug', 'info', 'success', 'warning', 'error', 'critical']
    debug: bool
    info: bool
    success: bool
    warning: bool
    error: bool
    critical: bool

In [27]:
logg = LoggingState(debug=False, info=False, success=False, warning=True, error=True, critical=True )

In [28]:
help(LoggingState)

Help on class LoggingState in module __main__:

class LoggingState(builtins.object)
 |  
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, other)
 |  
 |  
 |  __repr__(self)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  critical
 |  
 |  debug
 |  
 |  error
 |  
 |  info
 |  
 |  success
 |  
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __annotations__ = {'critical': <class 'bool'>, 'debug': <class 'bool'>...
 |  
 |  __dataclass_fields__ = {'critical': Field(name='critical',type=<class ...
 |  
 |  __dataclass_params__ = _DataclassParams(init=True,repr=True,eq=True,or...
 |  
 |  __hash__ = None



In [29]:
getmembers(LoggingState)

[('__annotations__',
  {'debug': bool,
   'info': bool,
   'success': bool,
   'error': bool,
   'critical': bool}),
 ('__class__', type),
 ('__dataclass_fields__',
  {'debug': Field(name='debug',type=<class 'bool'>,default=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
   'info': Field(name='info',type=<class 'bool'>,default=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
   'success': Field(name='success',type=<class 'bool'>,default=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f801cd08950>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
   'error': Field(na

In [30]:
@dataclass(init=True, repr=False, eq=False, order=False, unsafe_hash=False, frozen=False)
class LoggingState:
    __slots__ =  ['debug', 'info', 'success', 'warning', 'error', 'critical']
    debug: bool
    info: bool
    success: bool
    warning: bool
    error: bool
    critical: bool

In [31]:
help(LoggingState)

Help on class LoggingState in module __main__:

class LoggingState(builtins.object)
 |  
 |  
 |  Methods defined here:
 |  
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  critical
 |  
 |  debug
 |  
 |  error
 |  
 |  info
 |  
 |  success
 |  
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __annotations__ = {'critical': <class 'bool'>, 'debug': <class 'bool'>...
 |  
 |  __dataclass_fields__ = {'critical': Field(name='critical',type=<class ...
 |  
 |  __dataclass_params__ = _DataclassParams(init=True,repr=False,eq=False,...



### Adding a method

In [32]:
@dataclass
class Data():
    X: np.ndarray = None  # The field declaration: X
    y: np.array = None    # The field declaration: y
    z: int = 0   # The field declaration: kwargs
    
    def power_args(self):
        self.z = self.X**self.y
        
d = Data(1,2)
d.power_args()
d.z

1

In [33]:
d.X = 5
d.power_args()
d.z

25

### Immutable Data Classes

In [91]:
@dataclass(frozen=True)
class Data():
    X: np.ndarray = 0  # The field declaration: X
    y: np.array = 0    # The field declaration: y
    z: int = 0   # The field declaration: kwargs
        
d = Data()

In [92]:
d.y = 2

FrozenInstanceError: cannot assign to field 'y'

### Post-Init Processing

In [49]:
@dataclass
class Data():
    X: np.ndarray = None  # The field declaration: X
    y: np.array = None    # The field declaration: y
    kwargs: Dict = {}   # The field declaration: kwargs

ValueError: mutable default <class 'set'> for field kwargs is not allowed: use default_factory

In [48]:
@dataclass
class Data():
    X: np.ndarray = None  # The field declaration: X
    y: np.array = None    # The field declaration: y
    kwargs: Dict = None   # The field declaration: kwargs
        
    def __post_init__(self):
        self.kwargs = {}

d = Data()
d.kwargs

{}

In [82]:
@dataclass
class CrossValidation:
    inner_cv: int = 0
    outer_cv: int = 0
    eval_final_performance: bool = True
    test_size: float = 0.2
    calculate_metrics_per_fold: bool = True
    calculate_metrics_across_folds: bool = False
    outer_folds =  None
    inner_folds = None
    
    def __post_init__(self):
        self.outer_folds: Dict = {}
        self.outer_folds: Dict = {}

In [83]:
cv1 = CrossValidation(1,2)
cv2 = CrossValidation(1,2)
cv3 = CrossValidation(3,2,test_size=0.5)
cv1


CrossValidation(inner_cv=1, outer_cv=2, eval_final_performance=True, test_size=0.2, calculate_metrics_per_fold=True, calculate_metrics_across_folds=False)

In [84]:
help(cv1)

Help on CrossValidation in module __main__ object:

class CrossValidation(builtins.object)
 |  CrossValidation(inner_cv: int = 0, outer_cv: int = 0, eval_final_performance: bool = True, test_size: float = 0.2, calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False) -> None
 |  
 |  CrossValidation(inner_cv: int = 0, outer_cv: int = 0, eval_final_performance: bool = True, test_size: float = 0.2, calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False)
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, other)
 |  
 |  __init__(self, inner_cv: int = 0, outer_cv: int = 0, eval_final_performance: bool = True, test_size: float = 0.2, calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False) -> None
 |  
 |  __post_init__(self)
 |  
 |  __repr__(self)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary f

### Inheritance

In [95]:
@dataclass
class Data():
    X: np.ndarray = None  
    y: np.array = None    
    kwargs: Dict = None   
        
    def __post_init__(self):
        self.kwargs = {}
        
@dataclass
class Datatail(Data):
    z: int = 0
        
d = Datatail()
d
     

Datatail(X=None, y=None, kwargs={}, z=0)