Skip to content

Commit

Permalink
sqlite_zip speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhollas committed May 8, 2024
1 parent d7d934c commit 94f0439
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 14 deletions.
12 changes: 10 additions & 2 deletions src/aiida/storage/sqlite_zip/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from typing import BinaryIO, Iterable, Iterator, Optional, Sequence, Tuple, cast
from zipfile import ZipFile, is_zipfile

from archive_path import ZipPath, extract_file_in_zip
from pydantic import BaseModel, Field, field_validator
from sqlalchemy.orm import Session

Expand All @@ -33,7 +32,6 @@
from aiida.repository.backend.abstract import AbstractRepositoryBackend

from . import orm
from .migrator import get_schema_version_head, migrate, validate_storage
from .utils import (
DB_FILENAME,
META_FILENAME,
Expand Down Expand Up @@ -83,6 +81,8 @@ def filepath_exists_and_is_absolute(cls, value: str) -> str:

@classmethod
def version_head(cls) -> str:
from .migrator import get_schema_version_head

return get_schema_version_head()

@staticmethod
Expand Down Expand Up @@ -111,9 +111,13 @@ def initialise(cls, profile: 'Profile', reset: bool = False) -> bool:
tests having run.
:returns: ``True`` if the storage was initialised by the function call, ``False`` if it was already initialised.
"""
from archive_path import ZipPath

filepath_archive = Path(profile.storage_config['filepath'])

if filepath_archive.exists() and not reset:
from .migrator import migrate

# The archive exists but ``reset == False``, so we try to migrate to the latest schema version. If the
# migration works, we replace the original archive with the migrated one.
with tempfile.TemporaryDirectory() as dirpath:
Expand Down Expand Up @@ -162,6 +166,8 @@ def migrate(cls, profile: Profile):
raise NotImplementedError('use the :func:`aiida.storage.sqlite_zip.migrator.migrate` function directly.')

def __init__(self, profile: Profile):
from .migrator import validate_storage

super().__init__(profile)
self._path = Path(profile.storage_config['filepath'])
validate_storage(self._path)
Expand Down Expand Up @@ -194,6 +200,8 @@ def close(self):

def get_session(self) -> Session:
"""Return an SQLAlchemy session."""
from archive_path import extract_file_in_zip

if self._closed:
raise ClosedStorage(str(self))
if self._session is None:
Expand Down
21 changes: 11 additions & 10 deletions src/aiida/storage/sqlite_zip/migrations/legacy_to_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from aiida.storage.log import MIGRATE_LOGGER

from ..utils import DB_FILENAME, REPO_FOLDER, create_sqla_engine
from . import v1_db_schema as v1_schema
from .utils import update_metadata

_NODE_ENTITY_NAME = 'Node'
Expand All @@ -45,15 +44,6 @@
_COMMENT_ENTITY_NAME: {'dbnode': 'dbnode_id', 'user': 'user_id'},
}

aiida_orm_to_backend = {
_USER_ENTITY_NAME: v1_schema.DbUser,
_GROUP_ENTITY_NAME: v1_schema.DbGroup,
_NODE_ENTITY_NAME: v1_schema.DbNode,
_COMMENT_ENTITY_NAME: v1_schema.DbComment,
_COMPUTER_ENTITY_NAME: v1_schema.DbComputer,
_LOG_ENTITY_NAME: v1_schema.DbLog,
}

LEGACY_TO_MAIN_REVISION = 'main_0000'


Expand Down Expand Up @@ -138,6 +128,17 @@ def _json_to_sqlite(
"""Convert a JSON archive format to SQLite."""
from aiida.tools.archive.common import batch_iter

from . import v1_db_schema as v1_schema

aiida_orm_to_backend = {
_USER_ENTITY_NAME: v1_schema.DbUser,
_GROUP_ENTITY_NAME: v1_schema.DbGroup,
_NODE_ENTITY_NAME: v1_schema.DbNode,
_COMMENT_ENTITY_NAME: v1_schema.DbComment,
_COMPUTER_ENTITY_NAME: v1_schema.DbComputer,
_LOG_ENTITY_NAME: v1_schema.DbLog,
}

MIGRATE_LOGGER.report('Converting DB to SQLite')

engine = create_sqla_engine(outpath)
Expand Down
6 changes: 4 additions & 2 deletions src/aiida/storage/sqlite_zip/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@
"""Utilities for this backend."""

import json
import tarfile
import zipfile
from pathlib import Path
from typing import Any, Dict, Optional, Union

from archive_path import read_file_in_tar, read_file_in_zip
from sqlalchemy import event
from sqlalchemy.future.engine import Engine, create_engine

Expand Down Expand Up @@ -64,6 +62,10 @@ def extract_metadata(path: Union[str, Path], *, search_limit: Optional[int] = 10
:param search_limit: the maximum number of records to search for the metadata file in a zip file.
"""
import tarfile

from archive_path import read_file_in_tar, read_file_in_zip

path = Path(path)
if not path.exists():
raise UnreachableStorage(f'path not found: {path}')
Expand Down

0 comments on commit 94f0439

Please sign in to comment.