Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove add_validators and specify validators in class definition #507

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 14 additions & 20 deletions signac/synced_collections/backends/collection_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os
import uuid
import warnings
from typing import Tuple
from typing import Callable, Sequence, Tuple

from .. import SyncedCollection, SyncedDict, SyncedList
from ..buffers.memory_buffered_collection import SharedMemoryFileBufferedCollection
Expand Down Expand Up @@ -120,6 +120,15 @@ class JSONCollection(SyncedCollection):
_backend = __name__ # type: ignore
_supports_threading = True

# The order in which these validators are added is important, because
# validators are called in sequence and _convert_key_to_str will ensure that
# valid non-str keys are converted to strings before json_format_validator is
# called. This ordering is an implementation detail that we should not rely on
# in the future, however, the _convert_key_to_str validator will be removed in
# signac 2.0 so this is OK (that validator is modifying the data in place,
# which is unsupported behavior that will be removed in signac 2.0 as well).
_validators: Sequence[Callable] = (_convert_key_to_str, json_format_validator)

def __init__(self, filename=None, write_concern=False, *args, **kwargs):
# The `_filename` attribute _must_ be defined prior to calling the
# superclass constructors because the filename defines the `_lock_id`
Expand Down Expand Up @@ -177,16 +186,6 @@ def _lock_id(self):
return self._filename


# The order in which these validators are added is important, because
# validators are called in sequence and _convert_key_to_str will ensure that
# valid non-str keys are converted to strings before json_format_validator is
# called. This ordering is an implementation detail that we should not rely on
# in the future, however, the _convert_key_to_str validator will be removed in
# signac 2.0 so this is OK (that validator is modifying the data in place,
# which is unsupported behavior that will be removed in signac 2.0 as well).
JSONCollection.add_validator(_convert_key_to_str, json_format_validator)


# These are the common protected keys used by all JSONDict types.
_JSONDICT_PROTECTED_KEYS = (
# These are all protected keys that are inherited from data type classes.
Expand All @@ -197,6 +196,7 @@ def _lock_id(self):
"_sync",
"_root",
"_validators",
"_all_validators",
"_load_and_save",
"_suspend_sync",
"_supports_threading",
Expand Down Expand Up @@ -533,9 +533,7 @@ class JSONAttrDict(JSONDict, AttrDict):
"""

_backend = __name__ + ".attr" # type: ignore


JSONAttrDict.add_validator(no_dot_in_key)
_validators = (no_dot_in_key,)


class JSONAttrList(JSONList):
Expand All @@ -548,9 +546,7 @@ class BufferedJSONAttrDict(BufferedJSONDict, AttrDict):
"""A buffered :class:`JSONAttrDict`."""

_backend = __name__ + ".buffered_attr" # type: ignore


BufferedJSONAttrDict.add_validator(no_dot_in_key)
_validators = (no_dot_in_key,)


class BufferedJSONAttrList(BufferedJSONList):
Expand All @@ -563,9 +559,7 @@ class MemoryBufferedJSONAttrDict(MemoryBufferedJSONDict, AttrDict):
"""A buffered :class:`JSONAttrDict`."""

_backend = __name__ + ".memory_buffered_attr" # type: ignore


MemoryBufferedJSONAttrDict.add_validator(no_dot_in_key)
_validators = (no_dot_in_key,)


class MemoryBufferedJSONAttrList(MemoryBufferedJSONList):
Expand Down
18 changes: 8 additions & 10 deletions signac/synced_collections/backends/collection_mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class MongoDBCollection(SyncedCollection):

_backend = __name__ # type: ignore

# MongoDB uses BSON, which is not exactly JSON but is close enough that
# JSON-validation is reasonably appropriate. we could generalize this to do
# proper BSON validation if we find that the discrepancies (for instance, the
# supported integer data types differ) are too severe.
_validators = (json_format_validator,)

def __init__(self, collection=None, uid=None, parent=None, *args, **kwargs):
super().__init__(parent=parent, **kwargs)
if not MONGO:
Expand Down Expand Up @@ -107,13 +113,6 @@ def __deepcopy__(self, memo):
raise TypeError("MongoDBCollection does not support deepcopying.")


# MongoDB uses BSON, which is not exactly JSON but is close enough that
# JSON-validation is reasonably appropriate. we could generalize this to do
# proper BSON validation if we find that the discrepancies (for instance, the
# supported integer data types differ) are too severe.
MongoDBCollection.add_validator(json_format_validator)


class MongoDBDict(MongoDBCollection, SyncedDict):
r"""A dict-like data structure that synchronizes with a document in a MongoDB collection.

Expand Down Expand Up @@ -157,6 +156,8 @@ class MongoDBDict(MongoDBCollection, SyncedDict):

"""

_validators = (require_string_key,)

def __init__(
self, collection=None, uid=None, data=None, parent=None, *args, **kwargs
):
Expand All @@ -165,9 +166,6 @@ def __init__(
)


MongoDBDict.add_validator(require_string_key)


class MongoDBList(MongoDBCollection, SyncedList):
r"""A list-like data structure that synchronizes with a document in a MongoDB collection.

Expand Down
15 changes: 5 additions & 10 deletions signac/synced_collections/backends/collection_redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ class RedisCollection(SyncedCollection):

_backend = __name__ # type: ignore

# Redis collection relies on JSON-serialization for the data.
_validators = (json_format_validator,)

def __init__(self, client=None, key=None, *args, **kwargs):
super().__init__(**kwargs)
self._client = client
Expand Down Expand Up @@ -73,10 +76,6 @@ def __deepcopy__(self, memo):
raise TypeError("RedisCollection does not support deepcopying.")


# Redis collection relies on JSON-serialization for the data.
RedisCollection.add_validator(json_format_validator)


class RedisDict(RedisCollection, SyncedDict):
r"""A dict-like data structure that synchronizes with a persistent Redis database.

Expand Down Expand Up @@ -119,18 +118,14 @@ class RedisDict(RedisCollection, SyncedDict):

"""

_validators = (require_string_key,)

def __init__(self, client=None, key=None, data=None, parent=None, *args, **kwargs):
super().__init__(
client=client, key=key, data=data, parent=parent, *args, **kwargs
)


# TODO: This restriction actually may not be necessary, Redis can handle more
# generic data types easily. However, for now it is easier to manage a uniform
# set of restrictions across backends and relax this later.
RedisDict.add_validator(require_string_key)


class RedisList(RedisCollection, SyncedList):
r"""A list-like data structure that synchronizes with a persistent Redis database.

Expand Down
5 changes: 2 additions & 3 deletions signac/synced_collections/backends/collection_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,14 @@ class ZarrDict(ZarrCollection, SyncedDict):

"""

_validators = (require_string_key,)

def __init__(self, group=None, name=None, data=None, parent=None, *args, **kwargs):
super().__init__(
group=group, name=name, data=data, parent=parent, *args, **kwargs
)


ZarrDict.add_validator(require_string_key)


class ZarrList(ZarrCollection, SyncedList):
r"""A list-like data structure that synchronizes with a Zarr group.

Expand Down
102 changes: 66 additions & 36 deletions signac/synced_collections/data_types/synced_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections.abc import Collection
from inspect import isabstract
from threading import RLock
from typing import Any, Callable, DefaultDict, List
from typing import Any, DefaultDict, List

from ..numpy_utils import _convert_numpy
from ..utils import AbstractTypeResolver, _CounterContext, _NullContext
Expand Down Expand Up @@ -88,12 +88,34 @@ class SyncedCollection(Collection):
a given group should be interchangeable. For instance, a dict-like SyncedCollection
can be combined equally easily with JSON, MongoDB, or SQL backends.

To fully support the restrictions and requirements of particular backends,
arbitrary validators may be added to different subclasses. Validators are
callables that accept different data types as input and raise Exceptions if the
data does not conform to the requirements of a particular backend. For
example, a JSON validator would raise Exceptions if it detected non-string
keys in a dict. Validators should have no side effects.
**Validation**

Due to the restrictions of a particular backend or the needs of a particular
application, synced collections may need to restrict the data that they can
store. Validators provide a standardized mechanism for this. A validator is
a callable that parses any data added to a :class:`SyncedCollection` and
raises an `Exception` if any invalid data is provided. Validators cannot
modify the data and should have no side effects. They are purely provided
as a mechanism to reject invalid data. For example, a JSON validator would
raise Exceptions if it detected non-string keys in a dict.

Since :class:`SyncedCollection` is designed for extensive usage of
inheritance, validators may be inherited by subclasses. There are two attributes
that subclasses of :class:`SyncedCollection` can define to control the
validators used:
- ``_validators``: A list of callables that will be inherited by all
subclasses.
- ``_all_validators``: A list of callables that will be used to
validate this class, and this class alone.

When a :class:`SyncedCollection` subclass is initialized (note that this
is at *class* definition time, not when instances are created), its
:meth:`_register_validators` method will be called. If this class defines
an ``_all_validators`` attribute, this set of validators will be used by all
instances of this class. Otherwise, :meth:`_register_validators` will traverse
the MRO and collect the ``_validators`` attributes from all parents of a class,
and store these in the ``_all_validators`` attribute for the class.


.. note::

Expand All @@ -107,18 +129,24 @@ class SyncedCollection(Collection):

**Thread safety**

Whether or not SyncedCollection objects are thread-safe depends on the
Whether or not :class:`SyncedCollection` objects are thread-safe depends on the
implementation of the backend. Thread-safety of SyncedCollection objects
is predicated on backends providing an atomic write operation. All concrete
collection types use mutexes to guard against concurrent write operations,
while allowing read operations to happen freely. The validity of this mode
of access depends on the write operations of a SyncedCollection being
atomic, specifically the `:meth:`~._save_to_resource` method.
atomic, specifically the `:meth:`~._save_to_resource` method. Whether or not
a particular subclass of :class:`SyncedCollection` is thread-safe should be
indicated by that subclass setting the ``_supports_threading`` class variable
to ``True``. This variable is set to ``False`` by :class:`SyncedCollection`,
so subclasses must explicitly opt-in to support threading by setting this
variable to ``True``.

Backends that support multithreaded execution will have multithreaded
support turned on by default. This support can be enabled or disabled using
the :meth:`enable_multithreading` and :meth:`disable_multithreading`
methods.
methods. :meth:`enable_multithreading` will raise a `ValueError` if called
on a class that does not support multithreading.


Parameters
Expand All @@ -134,7 +162,6 @@ class SyncedCollection(Collection):
"""

registry: DefaultDict[str, List[Any]] = defaultdict(list)
_validators: List[Callable] = []
# Backends that support threading should modify this flag.
_supports_threading: bool = False
_LoadSaveType = _LoadAndSave
Expand All @@ -158,6 +185,30 @@ def __init__(self, parent=None, *args, **kwargs):
if type(self)._supports_threading:
type(self)._locks[self._lock_id] = RLock()

@classmethod
def _register_validators(cls):
"""Register all inherited validators to this class.

This method is called by __init_subclass__ when subclasses are created
to control what validators will be applied to data added to instances of
that class. By default, the ``_all_validators`` class variable defined
on the class itself determines the validation rules for that class. If
that variable is not defined, then all parents of the class are searched,
and a list of validators is constructed by concatenating the ``_validators``
class variable for each parent class that defines it.
"""
# Must explicitly look in cls.__dict__ so that the attribute is not
# inherited from a parent class.
if "_all_validators" not in cls.__dict__:
validators = []
# Classes inherit the validators of their parent classes.
for base_cls in cls.__mro__:
if hasattr(base_cls, "_validators"):
validators.extend(
[v for v in base_cls._validators if v not in validators]
)
cls._all_validators = validators

@classmethod
def __init_subclass__(cls):
"""Register and enable validation in subclasses.
Expand All @@ -174,7 +225,8 @@ def __init_subclass__(cls):
# whether we have a concrete subclass or not.
if not isabstract(cls):
SyncedCollection.registry[cls._backend].append(cls)
cls._validators = []

cls._register_validators()

# Monkey-patch subclasses that support locking.
if cls._supports_threading:
Expand Down Expand Up @@ -220,18 +272,8 @@ def disable_multithreading(cls):

@property
def validators(self):
"""List[Callable]: The validators that will be applied.

Validators are inherited from all parents of a class.
"""
validators = []
# Classes inherit the validators of their parent classes.
for base_cls in type(self).__mro__:
if hasattr(base_cls, "_validators"):
validators.extend(
[v for v in base_cls._validators if v not in validators]
)
return validators
"""List[Callable]: The validators that will be applied."""
vyasr marked this conversation as resolved.
Show resolved Hide resolved
return self._all_validators

@property
def _lock_id(self):
Expand All @@ -242,18 +284,6 @@ def _lock_id(self):
"set of locks."
)

@classmethod
def add_validator(cls, *args):
r"""Register a validator to this class.

Parameters
----------
\*args : List[Callable]
Validator(s) to register.

"""
cls._validators.extend([v for v in args if v not in cls._validators])

@property
@abstractmethod
def _backend(self):
Expand Down