Skip to content

Commit

Permalink
Enable "deep" reconstruction of yaml deserialization mapping construc…
Browse files Browse the repository at this point in the history
…tor (#3093)

The yaml mapping constructor in `aiida.orm.utils.serialize` for
`AttributeDicts` was not using the `deep=True` argument to recursively
reconstruct nested dictionaries. Combined with the recursive constructor
of the `AttributeDict` class, this would result in the loss of content
of normal dictionaries nested in an attribute dict in a serialization
round-trip.

Since this serialization is used to store `Process` instances as
checkpoints in the database, from which the daemon reloads the instance
into memory after a restart, this bug resulted in the loss of
information in those checkpoints. For example, if one stored a normal
dictionary in the context of a `WorkChain` instance, which is an
`AttributeDict`, the contents of that nested normal dictionary would not
be restored when the `WorkChain` was reloaded from the checkpoint
causing all sorts of exceptions in the business logic of the work chain.
  • Loading branch information
sphuber committed Jun 28, 2019
1 parent a62d664 commit f83c67b
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 80 deletions.
26 changes: 24 additions & 2 deletions aiida/backends/tests/common/test_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
from aiida.orm.utils import serialize
from aiida.backends.testbase import AiidaTestCase

# pylint: disable=missing-docstring


class TestSerialize(AiidaTestCase):
"""Tests for the YAML serializer and deserializer."""

def test_serialize_round_trip(self):
"""
Expand Down Expand Up @@ -101,3 +100,26 @@ def test_serialize_unstored_computer(self):

with self.assertRaises(ValueError):
serialize.serialize(computer)

def test_mixed_attribute_normal_dict(self):
"""Regression test for #3092.
The yaml mapping constructor in `aiida.orm.utils.serialize` was not properly "deeply" reconstructing nested
mappings, causing a mix of attribute dictionaries and normal dictionaries to lose information in a round-trip.
If a nested `AttributeDict` contained a normal dictionary, the content of the latter would be lost during the
deserialization, despite the information being present in the serialized yaml dump.
"""
from aiida.common.extendeddicts import AttributeDict

# Construct a nested `AttributeDict`, which should make all nested dictionaries `AttributeDicts` recursively
dictionary = {'nested': AttributeDict({'dict': 'string', 'value': 1})}
attribute_dict = AttributeDict(dictionary)

# Now add a normal dictionary in the attribute dictionary
attribute_dict['nested']['normal'] = {'a': 2}

serialized = serialize.serialize(attribute_dict)
deserialized = serialize.deserialize(serialized)

self.assertEqual(attribute_dict, deserialized)
135 changes: 57 additions & 78 deletions aiida/orm/utils/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
from functools import partial
import yaml

import plumpy
from plumpy import Bundle
from plumpy.utils import AttributesFrozendict

from aiida import common, orm
from aiida import orm
from aiida.common import AttributeDict

_NODE_TAG = '!aiida_node'
_GROUP_TAG = '!aiida_group'
Expand All @@ -35,8 +36,7 @@


def represent_node(dumper, node):
"""
Represent a node in YAML
"""Represent a node in yaml.
:param dumper: the dumper to use
:param node: the node to represent
Expand All @@ -49,8 +49,7 @@ def represent_node(dumper, node):


def node_constructor(loader, node):
"""
Load an aiida node from the yaml representation
"""Load a node from the yaml representation.
:param loader: the yaml loader
:param node: the yaml representation
Expand All @@ -62,8 +61,7 @@ def node_constructor(loader, node):


def represent_group(dumper, group):
"""
Represent a group in YAML
"""Represent a group in yaml.
:param dumper: the dumper to use
:param group: the group to represent
Expand All @@ -76,8 +74,7 @@ def represent_group(dumper, group):


def group_constructor(loader, group):
"""
Load an aiida group from the yaml representation
"""Load a group from the yaml representation.
:param loader: the yaml loader
:param group: the yaml representation
Expand All @@ -89,8 +86,7 @@ def group_constructor(loader, group):


def represent_computer(dumper, computer):
"""
Represent a group in YAML
"""Represent a computer in yaml.
:param dumper: the dumper to use
:param computer: the computer to represent
Expand All @@ -103,8 +99,7 @@ def represent_computer(dumper, computer):


def computer_constructor(loader, computer):
"""
Load an aiida computer from the yaml representation
"""Load a computer from the yaml representation.
:param loader: the yaml loader
:param computer: the yaml representation
Expand All @@ -115,113 +110,98 @@ def computer_constructor(loader, computer):
return orm.Computer.get(uuid=yaml_node)


class AiiDADumper(yaml.Dumper):
"""
Custom AiiDA YAML dumper. Needed so that we don't have to encode each type in the AiiDA graph
hierarchy separately using a custom representer.
"""

def represent_data(self, data):
if isinstance(data, orm.Node):
return represent_node(self, data)
if isinstance(data, orm.Computer):
return represent_computer(self, data)
if isinstance(data, orm.Group):
return represent_group(self, data)

return super(AiiDADumper, self).represent_data(data)


class AiiDALoader(yaml.Loader):
"""AiiDA specific YAML loader"""


def represent_mapping(tag, dumper, mapping):
"""
Represent an AttributeDict in YAML
"""Represent a mapping in yaml.
:param tag: the yaml tag to use
:param tag: the yaml tag to use
:param dumper: the dumper to use
:type dumper: :class:`yaml.dumper.Dumper`
:param mapping: the attribute dict to represent
:param mapping: the mapping to represent
:return: the representation
"""
return dumper.represent_mapping(tag, mapping)


def mapping_constructor(mapping_type, loader, mapping):
"""
Construct an AttributeDict from the representation
"""Construct a mapping from the representation.
:param mapping_type: the class of the mapping to construct, must accept a dictionary as a
sole constructor argument to be compatible
:param mapping_type: the class of the mapping to construct, must accept a dictionary as a sole constructor argument
to be compatible.
:param loader: the yaml loader
:type loader: :class:`yaml.loader.Loader`
:param mapping: the attribute dict representation
:return: the mapping type
:param mapping: the mapping representation
:return: the reconstructed mapping
"""
yaml_node = loader.construct_mapping(mapping)
yaml_node = loader.construct_mapping(mapping, deep=True)
return mapping_type(yaml_node)


# All the mapping types:

yaml.add_representer(
common.extendeddicts.AttributeDict, partial(represent_mapping, _ATTRIBUTE_DICT_TAG), Dumper=AiiDADumper)
yaml.add_constructor(
_ATTRIBUTE_DICT_TAG, partial(mapping_constructor, common.extendeddicts.AttributeDict), Loader=AiiDALoader)

yaml.add_representer(
AttributesFrozendict, partial(represent_mapping, _PLUMPY_ATTRIBUTES_FROZENDICT_TAG), Dumper=AiiDADumper)
yaml.add_constructor(
_PLUMPY_ATTRIBUTES_FROZENDICT_TAG, partial(mapping_constructor, AttributesFrozendict), Loader=AiiDALoader)


def represent_bundle(dumper, bundle):
"""
Represent an AttributeDict in YAML
"""Represent an `plumpy.Bundle` in yaml
:param tag: the yaml tag to use
:param dumper: the dumper to use
:type dumper: :class:`yaml.dumper.Dumper`
:param bundle: the attribute dict to represent
:param bundle: the bundle to represent
:return: the representation
"""
as_dict = dict(bundle)
return dumper.represent_mapping(_PLUMPY_BUNDLE, as_dict)


def bundle_constructor(loader, mapping):
"""
Construct an AttributeDict from the representation
def bundle_constructor(loader, bundle):
"""Construct an `plumpy.Bundle` from the representation
:param mapping: the class of the mapping to construct, must accept a dictionary as a sole constructor argument to
be compatible
:param loader: the yaml loader
:type loader: :class:`yaml.loader.Loader`
:param mapping: the attribute dict representation
:param bundle: the bundle representation
:return: the mapping type
"""
yaml_node = loader.construct_mapping(mapping)
bundle = plumpy.Bundle.__new__(plumpy.Bundle)
yaml_node = loader.construct_mapping(bundle)
bundle = Bundle.__new__(Bundle)
bundle.update(yaml_node)
return bundle


yaml.add_representer(plumpy.Bundle, represent_bundle, Dumper=AiiDADumper)
yaml.add_constructor(_PLUMPY_BUNDLE, bundle_constructor, Loader=AiiDALoader)
class AiiDADumper(yaml.Dumper):
"""Custom AiiDA yaml dumper.
Needed so that we don't have to encode each type in the AiiDA graph hierarchy separately using a custom representer.
"""

def represent_data(self, data):
if isinstance(data, orm.Node):
return represent_node(self, data)
if isinstance(data, orm.Computer):
return represent_computer(self, data)
if isinstance(data, orm.Group):
return represent_group(self, data)

return super(AiiDADumper, self).represent_data(data)


class AiiDALoader(yaml.Loader):
"""AiiDA specific yaml loader"""


yaml.add_representer(Bundle, represent_bundle, Dumper=AiiDADumper)
yaml.add_representer(AttributeDict, partial(represent_mapping, _ATTRIBUTE_DICT_TAG), Dumper=AiiDADumper)
yaml.add_constructor(_ATTRIBUTE_DICT_TAG, partial(mapping_constructor, AttributeDict), Loader=AiiDALoader)
yaml.add_representer(
AttributesFrozendict, partial(represent_mapping, _PLUMPY_ATTRIBUTES_FROZENDICT_TAG), Dumper=AiiDADumper)
yaml.add_constructor(
_PLUMPY_ATTRIBUTES_FROZENDICT_TAG, partial(mapping_constructor, AttributesFrozendict), Loader=AiiDALoader)
yaml.add_constructor(_PLUMPY_BUNDLE, bundle_constructor, Loader=AiiDALoader)
yaml.add_constructor(_NODE_TAG, node_constructor, Loader=AiiDALoader)
yaml.add_constructor(_GROUP_TAG, group_constructor, Loader=AiiDALoader)
yaml.add_constructor(_COMPUTER_TAG, computer_constructor, Loader=AiiDALoader)


def serialize(data, encoding=None):
"""
Serialize the given data structure into a string
"""Serialize the given data structure into a yaml dump.
The function supports standard data containers such as maps and lists as well as AiiDA nodes which will be
serialized into strings, before the whole data structure is dumped into a string using YAML.
serialized into strings, before the whole data structure is dumped into a string using yaml.
:param data: the general data to serialize
:param encoding: optional encoding for the serialized string
Expand All @@ -236,10 +216,9 @@ def serialize(data, encoding=None):


def deserialize(serialized):
"""
Deserialize a string that represents a serialized data structure
"""Deserialize a yaml dump that represents a serialized data structure.
:param serialized: the string representation of serialized data
:param serialized: a yaml serialized string representation
:return: the deserialized data structure
"""
return yaml.load(serialized, Loader=AiiDALoader)

0 comments on commit f83c67b

Please sign in to comment.