Skip to content

Commit

Permalink
Implement alembic migrations
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisjsewell committed Feb 25, 2022
1 parent 1149dfb commit 64dd454
Show file tree
Hide file tree
Showing 20 changed files with 375 additions and 224 deletions.
75 changes: 0 additions & 75 deletions aiida/storage/sqlite_zip/migrations/legacy/utils.py

This file was deleted.

22 changes: 21 additions & 1 deletion aiida/storage/sqlite_zip/migrations/legacy/v04_to_v05.py
Expand Up @@ -24,7 +24,27 @@
Where id is a SQLA id and migration-name is the name of the particular migration.
"""
# pylint: disable=invalid-name
from .utils import remove_fields, update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module


def remove_fields(metadata, data, entities, fields):
"""Remove fields under entities from data.json and metadata.json.
:param metadata: the content of an export archive metadata.json file
:param data: the content of an export archive data.json file
:param entities: list of ORM entities
:param fields: list of fields to be removed from the export archive files
"""
# data.json
for entity in entities:
for content in data['export_data'].get(entity, {}).values():
for field in fields:
content.pop(field, None)

# metadata.json
for entity in entities:
for field in fields:
metadata['all_fields_info'][entity].pop(field, None)


def migration_drop_node_columns_nodeversion_public(metadata, data):
Expand Down
2 changes: 1 addition & 1 deletion aiida/storage/sqlite_zip/migrations/legacy/v05_to_v06.py
Expand Up @@ -26,7 +26,7 @@
# pylint: disable=invalid-name
from typing import Union

from .utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module


def migrate_deserialized_datetime(data, conversion):
Expand Down
2 changes: 1 addition & 1 deletion aiida/storage/sqlite_zip/migrations/legacy/v06_to_v07.py
Expand Up @@ -24,7 +24,7 @@
Where id is a SQLA id and migration-name is the name of the particular migration.
"""
# pylint: disable=invalid-name
from .utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module


def data_migration_legacy_process_attributes(data):
Expand Down
2 changes: 1 addition & 1 deletion aiida/storage/sqlite_zip/migrations/legacy/v07_to_v08.py
Expand Up @@ -24,7 +24,7 @@
Where id is a SQLA id and migration-name is the name of the particular migration.
"""
# pylint: disable=invalid-name
from .utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module


def migration_default_link_label(data: dict):
Expand Down
2 changes: 1 addition & 1 deletion aiida/storage/sqlite_zip/migrations/legacy/v08_to_v09.py
Expand Up @@ -24,7 +24,7 @@
Where id is a SQLA id and migration-name is the name of the particular migration.
"""
# pylint: disable=invalid-name
from .utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module


def migration_dbgroup_type_string(data):
Expand Down
2 changes: 1 addition & 1 deletion aiida/storage/sqlite_zip/migrations/legacy/v09_to_v10.py
Expand Up @@ -9,7 +9,7 @@
###########################################################################
"""Migration from v0.9 to v0.10, used by `verdi export migrate` command."""
# pylint: disable=invalid-name,unused-argument
from .utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module


def migrate_v9_to_v10(metadata: dict, data: dict) -> None:
Expand Down
2 changes: 1 addition & 1 deletion aiida/storage/sqlite_zip/migrations/legacy/v10_to_v11.py
Expand Up @@ -11,7 +11,7 @@
This migration applies the name change of the ``Computer`` attribute ``name`` to ``label``.
"""
from .utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module


def migrate_v10_to_v11(metadata: dict, data: dict) -> None:
Expand Down
2 changes: 1 addition & 1 deletion aiida/storage/sqlite_zip/migrations/legacy/v11_to_v12.py
Expand Up @@ -11,7 +11,7 @@
This migration is necessary after the `core.` prefix was added to entry points shipped with `aiida-core`.
"""
from .utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module
from ..utils import update_metadata, verify_metadata_version # pylint: disable=no-name-in-module

MAPPING_DATA = {
'data.array.ArrayData.': 'data.core.array.ArrayData.',
Expand Down
107 changes: 50 additions & 57 deletions aiida/storage/sqlite_zip/migrations/legacy_to_main.py
Expand Up @@ -11,7 +11,6 @@
from contextlib import contextmanager
from datetime import datetime
from hashlib import sha256
import json
from pathlib import Path, PurePosixPath
import shutil
import tarfile
Expand All @@ -28,8 +27,8 @@
from aiida.storage.log import MIGRATE_LOGGER

from . import v1_db_schema as v1_schema
from ..utils import create_sqla_engine
from .legacy.utils import update_metadata
from ..utils import DB_FILENAME, REPO_FOLDER, create_sqla_engine
from .utils import update_metadata

_NODE_ENTITY_NAME = 'Node'
_GROUP_ENTITY_NAME = 'Group'
Expand Down Expand Up @@ -65,16 +64,18 @@
_LOG_ENTITY_NAME: v1_schema.DbLog,
}

_META_FILENAME = 'metadata.json'
_DB_FILENAME = 'db.sqlite3'
_REPO_FOLDER = 'repo'

MIGRATED_TO_REVISION = 'main_0001'
LEGACY_TO_MAIN_REVISION = 'main_0000'


def perform_v1_migration( # pylint: disable=too-many-locals
inpath: Path, working: Path, archive_name: str, is_tar: bool, metadata: dict, data: dict, compression: int
) -> None:
inpath: Path,
working: Path,
new_zip: ZipPath,
central_dir: Dict[str, Any],
is_tar: bool,
metadata: dict,
data: dict,
) -> Path:
"""Perform the repository and JSON to SQLite migration.
1. Iterate though the repository paths in the archive
Expand All @@ -84,10 +85,11 @@ def perform_v1_migration( # pylint: disable=too-many-locals
:param inpath: the input path to the old archive
:param metadata: the metadata to migrate
:param data: the data to migrate
:returns:the path to the sqlite database file
"""
MIGRATE_LOGGER.report('Initialising new archive...')
node_repos: Dict[str, List[Tuple[str, Optional[str]]]] = {}
central_dir: Dict[str, Any] = {}
if is_tar:
# we cannot stream from a tar file performantly, so we extract it to disk first
@contextmanager
Expand All @@ -101,53 +103,44 @@ def in_archive_context(_inpath):
shutil.rmtree(temp_folder)
else:
in_archive_context = ZipPath # type: ignore
with ZipPath(
working / archive_name,
mode='w',
compresslevel=compression,
name_to_info=central_dir,
info_order=(_META_FILENAME, _DB_FILENAME)
) as new_path:
with in_archive_context(inpath) as path:
length = sum(1 for _ in path.glob('**/*'))
base_parts = len(path.parts)
with get_progress_reporter()(desc='Converting repo', total=length) as progress:
for subpath in path.glob('**/*'):
progress.update()
parts = subpath.parts[base_parts:]
# repository file are stored in the legacy archive as `nodes/uuid[0:2]/uuid[2:4]/uuid[4:]/path/...`
if len(parts) < 6 or parts[0] != 'nodes' or parts[4] not in ('raw_input', 'path'):
continue
uuid = ''.join(parts[1:4])
posix_rel = PurePosixPath(*parts[5:])
hashkey = None
if subpath.is_file():

with in_archive_context(inpath) as path:
length = sum(1 for _ in path.glob('**/*'))
base_parts = len(path.parts)
with get_progress_reporter()(desc='Converting repo', total=length) as progress:
for subpath in path.glob('**/*'):
progress.update()
parts = subpath.parts[base_parts:]
# repository file are stored in the legacy archive as `nodes/uuid[0:2]/uuid[2:4]/uuid[4:]/path/...`
if len(parts) < 6 or parts[0] != 'nodes' or parts[4] not in ('raw_input', 'path'):
continue
uuid = ''.join(parts[1:4])
posix_rel = PurePosixPath(*parts[5:])
hashkey = None
if subpath.is_file():
with subpath.open('rb') as handle:
hashkey = chunked_file_hash(handle, sha256)
if f'{REPO_FOLDER}/{hashkey}' not in central_dir:
with subpath.open('rb') as handle:
hashkey = chunked_file_hash(handle, sha256)
if f'{_REPO_FOLDER}/{hashkey}' not in central_dir:
with subpath.open('rb') as handle:
with (new_path / f'{_REPO_FOLDER}/{hashkey}').open(mode='wb') as handle2:
shutil.copyfileobj(handle, handle2)
node_repos.setdefault(uuid, []).append((posix_rel.as_posix(), hashkey))
MIGRATE_LOGGER.report(f'Unique files written: {len(central_dir)}')

_json_to_sqlite(working / _DB_FILENAME, data, node_repos)

MIGRATE_LOGGER.report('Finalising archive')
with (working / _DB_FILENAME).open('rb') as handle:
with (new_path / _DB_FILENAME).open(mode='wb') as handle2:
shutil.copyfileobj(handle, handle2)

# remove legacy keys from metadata and store
metadata.pop('unique_identifiers', None)
metadata.pop('all_fields_info', None)
# remove legacy key nesting
metadata['creation_parameters'] = metadata.pop('export_parameters', {})
metadata['compression'] = compression
metadata['key_format'] = 'sha256'
metadata['mtime'] = datetime.now().isoformat()
update_metadata(metadata, MIGRATED_TO_REVISION)
(new_path / _META_FILENAME).write_text(json.dumps(metadata))
with (new_zip / f'{REPO_FOLDER}/{hashkey}').open(mode='wb') as handle2:
shutil.copyfileobj(handle, handle2)
node_repos.setdefault(uuid, []).append((posix_rel.as_posix(), hashkey))
MIGRATE_LOGGER.report(f'Unique files written: {len(central_dir)}')

# convert the JSON database to SQLite
_json_to_sqlite(working / DB_FILENAME, data, node_repos)

# remove legacy keys from metadata and store
metadata.pop('unique_identifiers', None)
metadata.pop('all_fields_info', None)
# remove legacy key nesting
metadata['creation_parameters'] = metadata.pop('export_parameters', {})
metadata['key_format'] = 'sha256'

# update the version in the metadata
update_metadata(metadata, LEGACY_TO_MAIN_REVISION)

return working / DB_FILENAME


def _json_to_sqlite(
Expand Down
2 changes: 2 additions & 0 deletions aiida/storage/sqlite_zip/migrations/script.py.mako
Expand Up @@ -17,8 +17,10 @@ depends_on = ${repr(depends_on)}


def upgrade():
"""Migrations for the upgrade."""
${upgrades if upgrades else "pass"}


def downgrade():
"""Migrations for the downgrade."""
${downgrades if downgrades else "pass"}
44 changes: 44 additions & 0 deletions aiida/storage/sqlite_zip/migrations/utils.py
Expand Up @@ -16,9 +16,53 @@

from archive_path import TarPath, ZipPath

from aiida.common import exceptions
from aiida.common.progress_reporter import create_callback, get_progress_reporter


def update_metadata(metadata, version):
"""Update the metadata with a new version number and a notification of the conversion that was executed.
:param metadata: the content of an export archive metadata.json file
:param version: string version number that the updated metadata should get
"""
from aiida import get_version

old_version = metadata['export_version']
conversion_info = metadata.get('conversion_info', [])

conversion_message = f'Converted from version {old_version} to {version} with AiiDA v{get_version()}'
conversion_info.append(conversion_message)

metadata['aiida_version'] = get_version()
metadata['export_version'] = version
metadata['conversion_info'] = conversion_info


def verify_metadata_version(metadata, version=None):
"""Utility function to verify that the metadata has the correct version number.
If no version number is passed, it will just extract the version number and return it.
:param metadata: the content of an export archive metadata.json file
:param version: string version number that the metadata is expected to have
"""
try:
metadata_version = metadata['export_version']
except KeyError:
raise exceptions.StorageMigrationError("metadata is missing the 'export_version' key")

if version is None:
return metadata_version

if metadata_version != version:
raise exceptions.StorageMigrationError(
f'expected archive file with version {version} but found version {metadata_version}'
)

return None


def copy_zip_to_zip(
inpath: Path,
outpath: Path,
Expand Down

0 comments on commit 64dd454

Please sign in to comment.