dumpers: adds dumpers/loaders feature

* Adds a new feature to dump and load a record. This will be used by to e.g. harmonize access to records loaded via e.g. the database, Elasticsearch or third-party systems.
inveniosoftware · Sep 3, 2020 · 282b33f · 282b33f
1 parent e4827fd
commit 282b33f
Show file tree

Hide file tree

Showing 6 changed files with 216 additions and 2 deletions.
diff --git a/invenio_records/api.py b/invenio_records/api.py
@@ -134,10 +134,30 @@ def replace_refs(self):
         """Replace the ``$ref`` keys within the JSON."""
         return _records_state.replace_refs(self)
 
-    def dumps(self, **kwargs):
-        """Return pure Python dictionary with record metadata."""
+    def dumps(self, cls=None):
+        """Make a dump of the record (defaults to a deep copy of the dict).
+
+        This method produces a version of a record that can be persisted on
+        storage such as the database, Elasticsearch or other mediums depending
+        on the dumper class used.
+
+        :param cls: Dumper class to use when dumping the record.
+        :returns: A ``dict``.
+        """
+        if cls:
+            return cls.dump(self)
         return deepcopy(dict(self))
 
+    @classmethod
+    def loads(record_cls, data, cls=None):
+        """Load a record dump.
+
+        :param cls: Loader class to use when loading the record.
+        :returns: A new :class:`Record` instance.
+        """
+        # The method is named with in plural to align with dumps.
+        return cls.load(data, record_cls)
+
 
 class Record(RecordBase):
     """Define API for metadata creation and manipulation."""
@@ -355,6 +375,8 @@ def revert(self, revision_id):
 
         with db.session.begin_nested():
             if self.send_signals:
+                # TODO: arguments to this signal does not make sense.
+                # out to be both record and revision.
                 before_record_revert.send(
                     current_app._get_current_object(),
                     record=self
@@ -367,6 +389,9 @@ def revert(self, revision_id):
             db.session.merge(self.model)
 
         if self.send_signals:
+            # TODO: arguments to this signal does not make sense.
+            # out to be the class being returned just below and should
+            # include the revision.
             after_record_revert.send(
                 current_app._get_current_object(),
                 record=self

diff --git a/invenio_records/dumpers/__init__.py b/invenio_records/dumpers/__init__.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Invenio.
+# Copyright (C) 2020 CERN.
+#
+# Invenio is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+"""Dumpers used for producing versions of records that can be persisted."""
+
+from .elasticsearch import ElasticsearchDumper
diff --git a/invenio_records/dumpers/base.py b/invenio_records/dumpers/base.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Invenio.
+# Copyright (C) 2020 CERN.
+#
+# Invenio is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+"""Base class interface for dumpers."""
+
+
+class Dumper:
+    """Interface for dumpers."""
+
+    def dump(self, record):
+        """Dump a record."""
+        raise NotImplementedError()
+
+    def load(self, data, record_cls):
+        """Load a record."""
+        raise NotImplementedError()
diff --git a/invenio_records/dumpers/elasticsearch.py b/invenio_records/dumpers/elasticsearch.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Invenio.
+# Copyright (C) 2020 CERN.
+#
+# Invenio is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+"""Elasticsearch source dumper.
+
+Dumper used to dump/load an Elasticsearch source document.
+"""
+
+from copy import deepcopy
+from uuid import UUID
+
+import arrow
+import pytz
+
+from .base import Dumper
+
+
+class ElasticsearchDumper(Dumper):
+    """Elasticsearch source dumper."""
+
+    def dump(self, record):
+        """Dump a record."""
+        # Copy data first, otherwise we modify the record.
+        data = deepcopy(dict(record))
+
+        # Dump model-level fields
+        data['@uuid'] = str(record.id) if record.id else None
+        data['@revision'] = \
+            record.revision_id if record.revision_id is not None else None
+        data['created'] = pytz.utc.localize(record.created).isoformat() \
+            if record.created else None
+        data['updated'] = pytz.utc.localize(record.updated).isoformat() \
+            if record.updated else None
+
+        return data
+
+    def load(self, data, record_cls):
+        """Load a record from Elasticsearch."""
+        id_ = data.pop('@uuid')
+        revision = data.pop('@revision')
+        created = arrow.get(data.pop('created')).datetime.replace(tzinfo=None)
+        updated = arrow.get(data.pop('updated')).datetime.replace(tzinfo=None)
+
+        if id_ is None:
+            model = None
+        else:
+            model = record_cls.model_cls(
+                id=UUID(id_),
+                data=data,
+                created=created,
+                updated=updated,
+                # SQLAlchemy version counter is 1-based, revsion is 0-based
+                version_id=revision + 1,
+            )
+
+        return record_cls(data, model=model)
diff --git a/setup.py b/setup.py
@@ -51,6 +51,7 @@
 ]
 
 install_requires = [
+    'arrow>=0.16.0',
     'invenio-base>=1.2.0',
     'invenio-celery>=1.2.0',
     'invenio-i18n>=1.2.0',

diff --git a/tests/test_api_dumpers.py b/tests/test_api_dumpers.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Invenio.
+# Copyright (C) 2020 CERN.
+#
+# Invenio is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+"""Test the dumpers API."""
+
+from datetime import date, datetime
+
+import pytest
+
+from invenio_records.api import Record
+from invenio_records.dumpers import ElasticsearchDumper
+
+
+@pytest.fixture()
+def example_data():
+    """Example record used for tests."""
+    return {
+        # "$schema": "",
+        "id": "12345-abcde",
+        "metadata": {
+            "title": "My record",
+            "date": "2020-09-20",
+        },
+        "pids": {
+            "oaiid": {"value": "", "provider": "local"},
+        },
+    }
+
+
+@pytest.fixture()
+def es_hit():
+    """Example record used for tests."""
+    return {
+        "_index": "testindex",
+        "_type": "_doc",
+        "_id": "4beb3b3e-a935-442e-a47b-6d386947ea20",
+        "_version": 5,
+        "_seq_no": 0,
+        "_primary_term": 1,
+        "found": True,
+        "_source": {
+            "@id": "4beb3b3e-a935-442e-a47b-6d386947ea20",
+            "@revision": 5,
+            "created": "2020-09-01T14:26:00+00:00",
+            "updated": "2020-09-02T14:28:21.968149+00:00'",
+            "id": "12345-abcde",
+            "metadata": {
+                "title": "My record",
+                "date": "2020-09-20",
+            },
+            "pids": {
+                "oaiid": {"value": "", "provider": "local"},
+            },
+        }
+    }
+
+
+def test_esdumper_without_model(testapp, db, example_data):
+    """Test the Elasticsearch dumper."""
+    # Dump without a model.
+    dump = Record(example_data).dumps(cls=ElasticsearchDumper())
+    for k in ['@uuid', '@revision', 'created', 'updated']:
+        assert dump[k] is None  # keys is set to none without a model
+    # Load without a model defined
+    record = Record.loads(dump, cls=ElasticsearchDumper())
+    assert record.model is None  # model will not be set
+    assert record == example_data  # data is equivalent to initial data
+
+
+def test_esdumper_with_model(testapp, db, example_data):
+    """Test the Elasticsearch dumper."""
+    # Create a record
+    record = Record.create(example_data)
+    db.session.commit()
+
+    # Dump it
+    dump = record.dumps(cls=ElasticsearchDumper())
+    assert dump['@uuid'] == str(record.id)
+    assert dump['@revision'] == record.revision_id
+    assert dump['created'][:19] == record.created.isoformat()[:19]
+    assert dump['updated'][:19] == record.updated.isoformat()[:19]
+
+    # Load it
+    new_record = Record.loads(dump, cls=ElasticsearchDumper())
+    assert new_record == record
+    assert new_record.id == record.id
+    assert new_record.revision_id == record.revision_id
+    assert new_record.created == record.created
+    assert new_record.updated == record.updated
+    assert new_record.model.json == record.model.json