-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
dumpers: adds dumpers/loaders feature
* Adds a new feature to dump and load a record. This will be used by to e.g. harmonize access to records loaded via e.g. the database, Elasticsearch or third-party systems.
- Loading branch information
Showing
6 changed files
with
216 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# This file is part of Invenio. | ||
# Copyright (C) 2020 CERN. | ||
# | ||
# Invenio is free software; you can redistribute it and/or modify it | ||
# under the terms of the MIT License; see LICENSE file for more details. | ||
|
||
"""Dumpers used for producing versions of records that can be persisted.""" | ||
|
||
from .elasticsearch import ElasticsearchDumper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# This file is part of Invenio. | ||
# Copyright (C) 2020 CERN. | ||
# | ||
# Invenio is free software; you can redistribute it and/or modify it | ||
# under the terms of the MIT License; see LICENSE file for more details. | ||
|
||
"""Base class interface for dumpers.""" | ||
|
||
|
||
class Dumper: | ||
"""Interface for dumpers.""" | ||
|
||
def dump(self, record): | ||
"""Dump a record.""" | ||
raise NotImplementedError() | ||
|
||
def load(self, data, record_cls): | ||
"""Load a record.""" | ||
raise NotImplementedError() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# This file is part of Invenio. | ||
# Copyright (C) 2020 CERN. | ||
# | ||
# Invenio is free software; you can redistribute it and/or modify it | ||
# under the terms of the MIT License; see LICENSE file for more details. | ||
|
||
"""Elasticsearch source dumper. | ||
Dumper used to dump/load an Elasticsearch source document. | ||
""" | ||
|
||
from copy import deepcopy | ||
from uuid import UUID | ||
|
||
import arrow | ||
import pytz | ||
|
||
from .base import Dumper | ||
|
||
|
||
class ElasticsearchDumper(Dumper): | ||
"""Elasticsearch source dumper.""" | ||
|
||
def dump(self, record): | ||
"""Dump a record.""" | ||
# Copy data first, otherwise we modify the record. | ||
data = deepcopy(dict(record)) | ||
|
||
# Dump model-level fields | ||
data['@uuid'] = str(record.id) if record.id else None | ||
data['@revision'] = \ | ||
record.revision_id if record.revision_id is not None else None | ||
data['created'] = pytz.utc.localize(record.created).isoformat() \ | ||
if record.created else None | ||
data['updated'] = pytz.utc.localize(record.updated).isoformat() \ | ||
if record.updated else None | ||
|
||
return data | ||
|
||
def load(self, data, record_cls): | ||
"""Load a record from Elasticsearch.""" | ||
id_ = data.pop('@uuid') | ||
revision = data.pop('@revision') | ||
created = arrow.get(data.pop('created')).datetime.replace(tzinfo=None) | ||
updated = arrow.get(data.pop('updated')).datetime.replace(tzinfo=None) | ||
|
||
if id_ is None: | ||
model = None | ||
else: | ||
model = record_cls.model_cls( | ||
id=UUID(id_), | ||
data=data, | ||
created=created, | ||
updated=updated, | ||
# SQLAlchemy version counter is 1-based, revsion is 0-based | ||
version_id=revision + 1, | ||
) | ||
|
||
return record_cls(data, model=model) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# This file is part of Invenio. | ||
# Copyright (C) 2020 CERN. | ||
# | ||
# Invenio is free software; you can redistribute it and/or modify it | ||
# under the terms of the MIT License; see LICENSE file for more details. | ||
|
||
"""Test the dumpers API.""" | ||
|
||
from datetime import date, datetime | ||
|
||
import pytest | ||
|
||
from invenio_records.api import Record | ||
from invenio_records.dumpers import ElasticsearchDumper | ||
|
||
|
||
@pytest.fixture() | ||
def example_data(): | ||
"""Example record used for tests.""" | ||
return { | ||
# "$schema": "", | ||
"id": "12345-abcde", | ||
"metadata": { | ||
"title": "My record", | ||
"date": "2020-09-20", | ||
}, | ||
"pids": { | ||
"oaiid": {"value": "", "provider": "local"}, | ||
}, | ||
} | ||
|
||
|
||
@pytest.fixture() | ||
def es_hit(): | ||
"""Example record used for tests.""" | ||
return { | ||
"_index": "testindex", | ||
"_type": "_doc", | ||
"_id": "4beb3b3e-a935-442e-a47b-6d386947ea20", | ||
"_version": 5, | ||
"_seq_no": 0, | ||
"_primary_term": 1, | ||
"found": True, | ||
"_source": { | ||
"@id": "4beb3b3e-a935-442e-a47b-6d386947ea20", | ||
"@revision": 5, | ||
"created": "2020-09-01T14:26:00+00:00", | ||
"updated": "2020-09-02T14:28:21.968149+00:00'", | ||
"id": "12345-abcde", | ||
"metadata": { | ||
"title": "My record", | ||
"date": "2020-09-20", | ||
}, | ||
"pids": { | ||
"oaiid": {"value": "", "provider": "local"}, | ||
}, | ||
} | ||
} | ||
|
||
|
||
def test_esdumper_without_model(testapp, db, example_data): | ||
"""Test the Elasticsearch dumper.""" | ||
# Dump without a model. | ||
dump = Record(example_data).dumps(cls=ElasticsearchDumper()) | ||
for k in ['@uuid', '@revision', 'created', 'updated']: | ||
assert dump[k] is None # keys is set to none without a model | ||
# Load without a model defined | ||
record = Record.loads(dump, cls=ElasticsearchDumper()) | ||
assert record.model is None # model will not be set | ||
assert record == example_data # data is equivalent to initial data | ||
|
||
|
||
def test_esdumper_with_model(testapp, db, example_data): | ||
"""Test the Elasticsearch dumper.""" | ||
# Create a record | ||
record = Record.create(example_data) | ||
db.session.commit() | ||
|
||
# Dump it | ||
dump = record.dumps(cls=ElasticsearchDumper()) | ||
assert dump['@uuid'] == str(record.id) | ||
assert dump['@revision'] == record.revision_id | ||
assert dump['created'][:19] == record.created.isoformat()[:19] | ||
assert dump['updated'][:19] == record.updated.isoformat()[:19] | ||
|
||
# Load it | ||
new_record = Record.loads(dump, cls=ElasticsearchDumper()) | ||
assert new_record == record | ||
assert new_record.id == record.id | ||
assert new_record.revision_id == record.revision_id | ||
assert new_record.created == record.created | ||
assert new_record.updated == record.updated | ||
assert new_record.model.json == record.model.json |