In [None]:
# default_exp core.data

# core.data
> The basic data structures are defined here.

In [None]:
#export
from typing import Union, Optional, Any, Iterable, Callable
import os
import shutil
from abc import ABC, abstractmethod

In [None]:
#hide
import pytest

In [None]:
#export
class Observable(ABC):
    """Simple implementation of the observer pattern."""
    def __init__(self):
        self._callbacks = []
    
    def register_callback(self, callback: Callable):
        self._callbacks.append(callback)
        
    def trigger_callbacks(self):
        for callback in self._callbacks:
            callback(self)

Implements basic observer pattern, with a register and trigger function.

In [None]:
#hide
test_observable = Observable()
test_callback_register = []
test_observable.register_callback(lambda x: test_callback_register.append(0))
test_observable.trigger_callbacks()
assert len(test_callback_register) == 1

In [None]:
#export
class ObservableList(Observable):
    """List with observer pattern. The internal list prepresentation can be accessed with the list attribute"""
    def __init__(self, observable_list: list):
        self._list = observable_list
        super().__init__()
        
    @property
    def list(self):
        return self._list
    
    @list.setter
    def list(self, value: Any):
        self._list = value
        self.trigger_callbacks()
    
    def __repr__(self):
        return self._list.__repr__()
    
    def __iter__(self):
        for item in self._list:
            yield item
    
    def __len__(self):
        return len(self._list)
    
    def __getitem__(self, index: int):
        return self._list[index]
    
    def __setitem__(self, index: int, value: Any):
        self._list[index] = value
        self.trigger_callbacks()
    
    def append(self, item: Any):
        self._list.append(item)
        self.trigger_callbacks()
        
    def remove(self, item: Any):
        self._list.remove(item)
        self.trigger_callbacks()
        
    def insert(self, index: int, item: Any):
        self._list.insert(index, item)
        self.trigger_callbacks()
    
    def pop(self, index: int = -1):
        poped_item = self._list.pop(index)
        self.trigger_callbacks()
        return poped_item
    
    def extend(self, iterable: Iterable):
        self._list.extend(iterable)
        self.trigger_callbacks()
        
    def clear(self):
        self._list = []
        
    def count(self, item):
        return self._list.count(item)
    
    def index(self, item, start=0, stop=9223372036854775807):
        return self._list.index(item, start, stop)
    
    def reverse(self):
        self._list.reverse()
        
    def sort(self, key=float, reverse=False):
        self._list.sort(key=key, reverse=reverse)

Implements a list with the observer patter. If the list changes all registered callbacks will be executed.

In [None]:
#hide
obs_list = ObservableList([])
call_register = []
obs_list.register_callback(lambda x: call_register.append(x))

obs_list.list = [1]
assert obs_list.list == [1]
assert obs_list.__repr__() == "[1]"
assert call_register[-1] == obs_list
assert obs_list.count(1) == 1
assert obs_list.index(1) == 0

obs_list.append(2)
assert call_register[-1] == obs_list

obs_list.pop()
assert call_register[-1] == obs_list

obs_list.extend([3,4,5])
assert call_register[-1] == obs_list

obs_list.insert(2, 4)
assert call_register[-1] == obs_list

obs_list.remove(4)
assert call_register[-1] == obs_list

obs_list[0] = 0
assert obs_list[0] == 0

obs_list._list = [1,3,2]
obs_list.sort()
assert obs_list._list == [1,2,3]

obs_list.reverse()
assert obs_list._list == [3,2,1]

obs_list.clear()
assert obs_list._list == []

In [None]:
#export
class DatasetDescriptor(ABC):
    """Abstract base class for descriptors of datasets"""
    def __set_name__(self, owner, name):
        owner._descriptors.append(self)
        self.private_name = '_' + name

    def __get__(self, obj, objtype=None):
        if getattr(obj, self.private_name) is None:
            value = self.calculate_description(obj)
            setattr(obj, self.private_name, value)
        return getattr(obj, self.private_name)

    def __set__(self, obj, value):
        if value is None:
            setattr(obj, self.private_name, value)
        else:
            raise ValueError("Attribute can only be set to None externaly.")
            
    @abstractmethod
    def calculate_description(self, obj):
        pass

Abstaract base class for dataset descriptors. Inherited classes are required to implement a `calculate_description` function, that calculates the specific stats about a dataset one wants. For more information on how they are used see: `GenericDataset`.

In [None]:
#export
class StringDescriptor:
    """Descriptor for strings"""
    def __set_name__(self, owner, name):
        self.private_name = '_' + name
    
    def __get__(self, obj, objtype=None):
        return getattr(obj, self.private_name)
    
    def __set__(self, obj, value):
        setattr(obj, self.private_name, value)

Descriptor for strings. Mainly used for name and descrition of a dataset (see `GenericDataset`).

In [None]:
#export
class GenericDataset:
    """A generic datset that has a name and description. Data is stored under the attribute base_data. The class provides a function `reset_infered_data` which can be called to reset all descriptors."""
    _descriptors = []
    
    name = StringDescriptor()
    description = StringDescriptor()
    
    def __init__(self, base_data, name: Optional[str] = None, description: Optional[str] = None):
        self.base_data = base_data
        self.name = name
        self.description = description
        super().__init__()
        
    def reset_infered_data(self, new_data=None):
        """Takes on argument to be compatible with panel."""
        for descriptor in self._descriptors:
            descriptor.__set__(self, None)

Generic base class for datasets that implements the basic control mechanisms. The idea behind the controll mechanism is, that if changes to the underlying data are made the changes to the infered data
can be propagated without explicit calls after each change. To achive this information that is infered from the underlying data needs to be defined as a descriptor. The easiest way is to inherit from `DatasetDescriptor` and define the `calculate_description` method. To have the changes in the underlying data propagated the `reset_infered_data` function is provided. The `base_data` should be of a type that has the observer pattern implemented, then the `reset_infered_data` method can just be registered.

In [None]:
#hide
test_generic_dataset = GenericDataset([])
test_generic_dataset.name = "Test"
assert test_generic_dataset.name == "Test"
test_generic_dataset.description = "A short description"
assert test_generic_dataset.description == "A short description"

## Example
Here we create a descriptor that returns a dict with with name and desription of the dataset. Then we create a TestDataset class that uses the new descriptor.

In [None]:
class DatasetStatsDescriptor(DatasetDescriptor):
    def calculate_description(self, obj):
        return {"name": obj.name, "description": obj.description, "num_data_points": len(obj.base_data)}

In [None]:
class TestDataset(GenericDataset):
    stats = DatasetStatsDescriptor()

    def __init__(self):
        data = ObservableList([1,2,3])
        super().__init__(data, name="name", description="description")
        self.stats = None
        # register the reset hook
        self.base_data.register_callback(self.reset_infered_data)

In [None]:
test_dataset = TestDataset()
print("Stats before update: ", test_dataset.stats)
# if we change the data the stats automatically update 
test_dataset.base_data.append(4)
print("Stats after update: ", test_dataset.stats)

In [None]:
#hide
with pytest.raises(ValueError):
    test_dataset.stats = "test"