Skip to content

Commit

Permalink
dumpers: adds dumpers/loaders feature
Browse files Browse the repository at this point in the history
* Adds a new feature to dump and load a record. This will be used by
  to e.g. harmonize access to records loaded via e.g. the database,
  Elasticsearch or third-party systems.
  • Loading branch information
lnielsen committed Sep 3, 2020
1 parent e4827fd commit 282b33f
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 2 deletions.
29 changes: 27 additions & 2 deletions invenio_records/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,30 @@ def replace_refs(self):
"""Replace the ``$ref`` keys within the JSON."""
return _records_state.replace_refs(self)

def dumps(self, **kwargs):
"""Return pure Python dictionary with record metadata."""
def dumps(self, cls=None):
"""Make a dump of the record (defaults to a deep copy of the dict).
This method produces a version of a record that can be persisted on
storage such as the database, Elasticsearch or other mediums depending
on the dumper class used.
:param cls: Dumper class to use when dumping the record.
:returns: A ``dict``.
"""
if cls:
return cls.dump(self)
return deepcopy(dict(self))

@classmethod
def loads(record_cls, data, cls=None):
"""Load a record dump.
:param cls: Loader class to use when loading the record.
:returns: A new :class:`Record` instance.
"""
# The method is named with in plural to align with dumps.
return cls.load(data, record_cls)


class Record(RecordBase):
"""Define API for metadata creation and manipulation."""
Expand Down Expand Up @@ -355,6 +375,8 @@ def revert(self, revision_id):

with db.session.begin_nested():
if self.send_signals:
# TODO: arguments to this signal does not make sense.
# out to be both record and revision.
before_record_revert.send(
current_app._get_current_object(),
record=self
Expand All @@ -367,6 +389,9 @@ def revert(self, revision_id):
db.session.merge(self.model)

if self.send_signals:
# TODO: arguments to this signal does not make sense.
# out to be the class being returned just below and should
# include the revision.
after_record_revert.send(
current_app._get_current_object(),
record=self
Expand Down
11 changes: 11 additions & 0 deletions invenio_records/dumpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2020 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Dumpers used for producing versions of records that can be persisted."""

from .elasticsearch import ElasticsearchDumper
21 changes: 21 additions & 0 deletions invenio_records/dumpers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2020 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Base class interface for dumpers."""


class Dumper:
"""Interface for dumpers."""

def dump(self, record):
"""Dump a record."""
raise NotImplementedError()

def load(self, data, record_cls):
"""Load a record."""
raise NotImplementedError()
61 changes: 61 additions & 0 deletions invenio_records/dumpers/elasticsearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2020 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Elasticsearch source dumper.
Dumper used to dump/load an Elasticsearch source document.
"""

from copy import deepcopy
from uuid import UUID

import arrow
import pytz

from .base import Dumper


class ElasticsearchDumper(Dumper):
"""Elasticsearch source dumper."""

def dump(self, record):
"""Dump a record."""
# Copy data first, otherwise we modify the record.
data = deepcopy(dict(record))

# Dump model-level fields
data['@uuid'] = str(record.id) if record.id else None
data['@revision'] = \
record.revision_id if record.revision_id is not None else None
data['created'] = pytz.utc.localize(record.created).isoformat() \
if record.created else None
data['updated'] = pytz.utc.localize(record.updated).isoformat() \
if record.updated else None

return data

def load(self, data, record_cls):
"""Load a record from Elasticsearch."""
id_ = data.pop('@uuid')
revision = data.pop('@revision')
created = arrow.get(data.pop('created')).datetime.replace(tzinfo=None)
updated = arrow.get(data.pop('updated')).datetime.replace(tzinfo=None)

if id_ is None:
model = None
else:
model = record_cls.model_cls(
id=UUID(id_),
data=data,
created=created,
updated=updated,
# SQLAlchemy version counter is 1-based, revsion is 0-based
version_id=revision + 1,
)

return record_cls(data, model=model)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
]

install_requires = [
'arrow>=0.16.0',
'invenio-base>=1.2.0',
'invenio-celery>=1.2.0',
'invenio-i18n>=1.2.0',
Expand Down
95 changes: 95 additions & 0 deletions tests/test_api_dumpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2020 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Test the dumpers API."""

from datetime import date, datetime

import pytest

from invenio_records.api import Record
from invenio_records.dumpers import ElasticsearchDumper


@pytest.fixture()
def example_data():
"""Example record used for tests."""
return {
# "$schema": "",
"id": "12345-abcde",
"metadata": {
"title": "My record",
"date": "2020-09-20",
},
"pids": {
"oaiid": {"value": "", "provider": "local"},
},
}


@pytest.fixture()
def es_hit():
"""Example record used for tests."""
return {
"_index": "testindex",
"_type": "_doc",
"_id": "4beb3b3e-a935-442e-a47b-6d386947ea20",
"_version": 5,
"_seq_no": 0,
"_primary_term": 1,
"found": True,
"_source": {
"@id": "4beb3b3e-a935-442e-a47b-6d386947ea20",
"@revision": 5,
"created": "2020-09-01T14:26:00+00:00",
"updated": "2020-09-02T14:28:21.968149+00:00'",
"id": "12345-abcde",
"metadata": {
"title": "My record",
"date": "2020-09-20",
},
"pids": {
"oaiid": {"value": "", "provider": "local"},
},
}
}


def test_esdumper_without_model(testapp, db, example_data):
"""Test the Elasticsearch dumper."""
# Dump without a model.
dump = Record(example_data).dumps(cls=ElasticsearchDumper())
for k in ['@uuid', '@revision', 'created', 'updated']:
assert dump[k] is None # keys is set to none without a model
# Load without a model defined
record = Record.loads(dump, cls=ElasticsearchDumper())
assert record.model is None # model will not be set
assert record == example_data # data is equivalent to initial data


def test_esdumper_with_model(testapp, db, example_data):
"""Test the Elasticsearch dumper."""
# Create a record
record = Record.create(example_data)
db.session.commit()

# Dump it
dump = record.dumps(cls=ElasticsearchDumper())
assert dump['@uuid'] == str(record.id)
assert dump['@revision'] == record.revision_id
assert dump['created'][:19] == record.created.isoformat()[:19]
assert dump['updated'][:19] == record.updated.isoformat()[:19]

# Load it
new_record = Record.loads(dump, cls=ElasticsearchDumper())
assert new_record == record
assert new_record.id == record.id
assert new_record.revision_id == record.revision_id
assert new_record.created == record.created
assert new_record.updated == record.updated
assert new_record.model.json == record.model.json

0 comments on commit 282b33f

Please sign in to comment.