From bff117ade95bf237b5fdeb8dc57530c945aaa036 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 7 May 2021 10:45:18 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20`aiida/repository`=20typi?= =?UTF-8?q?ng=20(#4920)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was a subtle bug in `_insert_file`, where in the else clause of the conditional `self.get_directory` was missing the parentheses to call the method. This didn't show up in the tests because the code path was actually not reachable since the conditional is always True. The original intention was to get the parent directory of the file to be created, and only create it if it wasn't already created. However, this behavior is anyway guaranteed by `create_directory` so this can be safely called in all instances and we can get rid of the entire conditional. In certain methods, such as `open`, an explicit assert had to be placed to check that the `File` instance returned by `get_file` had a `key` that is not `None`, or `mypy` would complain. The reason that this is an assert is because a `File` of type `FileType.FILE` always has `key` that is not `None`, as is guaranteed by its constructor and the `get_file` method will check that the path does correspond to a file and not a directory, in which case it would raise `IsADirectory`. However, `mypy` cannot introspect this polymorphism and so the explicit check is needed. Since this would constitute an internal coding error if the check fails, we use an assert. Finally, the design of the `put_object_from_filelike` method in the abstract class that performed the validation such that subclasses would not have to, also didn't please the `mypy` gods. So instead, the validation implementation of this method is moved to the protected method `_put_object_from_filelike` and the base class performs the validation and then calls this method. --- .pre-commit-config.yaml | 1 + aiida/backends/general/migrations/utils.py | 2 +- aiida/orm/nodes/repository.py | 2 +- aiida/repository/__init__.py | 2 +- aiida/repository/backend/__init__.py | 2 +- aiida/repository/backend/abstract.py | 14 ++-- aiida/repository/backend/disk_object_store.py | 12 ++-- aiida/repository/backend/sandbox.py | 14 ++-- aiida/repository/repository.py | 70 +++++++++---------- tests/repository/backend/test_abstract.py | 3 + .../backend/test_disk_object_store.py | 7 ++ tests/repository/backend/test_sandbox.py | 7 ++ 12 files changed, 80 insertions(+), 56 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a4204ac27f..bc733386ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,6 +48,7 @@ repos: aiida/manage/database/delete/nodes.py| aiida/orm/nodes/node.py| aiida/orm/nodes/process/.*py| + aiida/repository/.*py| aiida/tools/graph/graph_traversers.py| aiida/tools/groups/paths.py| aiida/tools/importexport/archive/.*py| diff --git a/aiida/backends/general/migrations/utils.py b/aiida/backends/general/migrations/utils.py index 13f4468326..c59904078e 100644 --- a/aiida/backends/general/migrations/utils.py +++ b/aiida/backends/general/migrations/utils.py @@ -108,7 +108,7 @@ def is_initialised(self) -> bool: def erase(self): raise NotImplementedError() - def put_object_from_filelike(self, handle: io.BufferedIOBase) -> str: + def _put_object_from_filelike(self, handle: io.BufferedIOBase) -> str: """Store the byte contents of a file in the repository. :param handle: filelike object with the byte content to be stored. diff --git a/aiida/orm/nodes/repository.py b/aiida/orm/nodes/repository.py index ee8f8d35be..f215cfc845 100644 --- a/aiida/orm/nodes/repository.py +++ b/aiida/orm/nodes/repository.py @@ -105,7 +105,7 @@ def list_object_names(self, path: str = None) -> typing.List[str]: return self._repository.list_object_names(path) @contextlib.contextmanager - def open(self, path: str, mode='r') -> io.BufferedReader: + def open(self, path: str, mode='r') -> typing.Iterator[typing.BinaryIO]: """Open a file handle to an object stored under the given key. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method diff --git a/aiida/repository/__init__.py b/aiida/repository/__init__.py index 2f8ec80902..6c71f4dbaa 100644 --- a/aiida/repository/__init__.py +++ b/aiida/repository/__init__.py @@ -13,4 +13,4 @@ from .common import * from .repository import * -__all__ = (backend.__all__ + common.__all__ + repository.__all__) +__all__ = (backend.__all__ + common.__all__ + repository.__all__) # type: ignore[name-defined] diff --git a/aiida/repository/backend/__init__.py b/aiida/repository/backend/__init__.py index b20d6ca9fc..20a704f865 100644 --- a/aiida/repository/backend/__init__.py +++ b/aiida/repository/backend/__init__.py @@ -5,4 +5,4 @@ from .disk_object_store import * from .sandbox import * -__all__ = (abstract.__all__ + disk_object_store.__all__ + sandbox.__all__) +__all__ = (abstract.__all__ + disk_object_store.__all__ + sandbox.__all__) # type: ignore[name-defined] diff --git a/aiida/repository/backend/abstract.py b/aiida/repository/backend/abstract.py index 1ad5caa680..558c9d4dc6 100644 --- a/aiida/repository/backend/abstract.py +++ b/aiida/repository/backend/abstract.py @@ -44,7 +44,7 @@ def is_initialised(self) -> bool: """Return whether the repository has been initialised.""" @abc.abstractmethod - def erase(self): + def erase(self) -> None: """Delete the repository itself and all its contents. .. note:: This should not merely delete the contents of the repository but any resources it created. For @@ -53,17 +53,23 @@ def erase(self): """ @staticmethod - def is_readable_byte_stream(handle): + def is_readable_byte_stream(handle) -> bool: return hasattr(handle, 'read') and hasattr(handle, 'mode') and 'b' in handle.mode - def put_object_from_filelike(self, handle: io.BufferedIOBase) -> str: + def put_object_from_filelike(self, handle: typing.BinaryIO) -> str: """Store the byte contents of a file in the repository. :param handle: filelike object with the byte content to be stored. :return: the generated fully qualified identifier for the object within the repository. + :raises TypeError: if the handle is not a byte stream. """ if not isinstance(handle, io.BytesIO) and not self.is_readable_byte_stream(handle): raise TypeError(f'handle does not seem to be a byte stream: {type(handle)}.') + return self._put_object_from_filelike(handle) + + @abc.abstractmethod + def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: + pass def put_object_from_file(self, filepath: typing.Union[str, pathlib.Path]) -> str: """Store a new object with contents of the file located at `filepath` on this file system. @@ -84,7 +90,7 @@ def has_object(self, key: str) -> bool: """ @contextlib.contextmanager - def open(self, key: str) -> io.BufferedIOBase: + def open(self, key: str) -> typing.Iterator[typing.BinaryIO]: # type: ignore[return] """Open a file handle to an object stored under the given key. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index 06086c8e44..49902cba01 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- """Implementation of the ``AbstractRepositoryBackend`` using the ``disk-objectstore`` as the backend.""" import contextlib -import io import shutil import typing @@ -23,7 +22,9 @@ def __init__(self, container): def __str__(self) -> str: """Return the string representation of this repository.""" - return f'DiskObjectStoreRepository: {self.container.container_id} | {self.container.get_folder()}' + if self.is_initialised: + return f'DiskObjectStoreRepository: {self.container.container_id} | {self.container.get_folder()}' + return 'DiskObjectStoreRepository: ' @property def uuid(self) -> typing.Optional[str]: @@ -45,7 +46,7 @@ def is_initialised(self) -> bool: return self.container.is_initialised @property - def container(self): + def container(self) -> Container: return self._container def erase(self): @@ -55,14 +56,13 @@ def erase(self): except FileNotFoundError: pass - def put_object_from_filelike(self, handle: io.BufferedIOBase) -> str: + def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: """Store the byte contents of a file in the repository. :param handle: filelike object with the byte content to be stored. :return: the generated fully qualified identifier for the object within the repository. :raises TypeError: if the handle is not a byte stream. """ - super().put_object_from_filelike(handle) return self.container.add_object(handle.read()) def has_object(self, key: str) -> bool: @@ -74,7 +74,7 @@ def has_object(self, key: str) -> bool: return self.container.has_object(key) @contextlib.contextmanager - def open(self, key: str) -> io.BufferedIOBase: + def open(self, key: str) -> typing.Iterator[typing.BinaryIO]: """Open a file handle to an object stored under the given key. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method diff --git a/aiida/repository/backend/sandbox.py b/aiida/repository/backend/sandbox.py index 0bb7cc6433..f4577f4783 100644 --- a/aiida/repository/backend/sandbox.py +++ b/aiida/repository/backend/sandbox.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- """Implementation of the ``AbstractRepositoryBackend`` using a sandbox folder on disk as the backend.""" import contextlib -import io import os import shutil import typing @@ -16,11 +15,14 @@ class SandboxRepositoryBackend(AbstractRepositoryBackend): """Implementation of the ``AbstractRepositoryBackend`` using a sandbox folder on disk as the backend.""" def __init__(self): - self._sandbox = None + from aiida.common.folders import SandboxFolder + self._sandbox: typing.Optional[SandboxFolder] = None def __str__(self) -> str: """Return the string representation of this repository.""" - return f'SandboxRepository: {self._sandbox.abspath}' + if self.is_initialised: + return f'SandboxRepository: {self._sandbox.abspath if self._sandbox else "null"}' + return 'SandboxRepository: ' def __del__(self): """Delete the entire sandbox folder if it was instantiated and still exists.""" @@ -68,15 +70,13 @@ def erase(self): finally: self._sandbox = None - def put_object_from_filelike(self, handle: io.BufferedIOBase) -> str: + def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: """Store the byte contents of a file in the repository. :param handle: filelike object with the byte content to be stored. :return: the generated fully qualified identifier for the object within the repository. :raises TypeError: if the handle is not a byte stream. """ - super().put_object_from_filelike(handle) - key = str(uuid.uuid4()) filepath = os.path.join(self.sandbox.abspath, key) @@ -94,7 +94,7 @@ def has_object(self, key: str) -> bool: return key in os.listdir(self.sandbox.abspath) @contextlib.contextmanager - def open(self, key: str) -> io.BufferedIOBase: + def open(self, key: str) -> typing.Iterator[typing.BinaryIO]: """Open a file handle to an object stored under the given key. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method diff --git a/aiida/repository/repository.py b/aiida/repository/repository.py index 8a929c4f72..5f533a6298 100644 --- a/aiida/repository/repository.py +++ b/aiida/repository/repository.py @@ -1,9 +1,8 @@ # -*- coding: utf-8 -*- """Module for the implementation of a file repository.""" import contextlib -import io import pathlib -import typing +from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union from aiida.common.hashing import make_hash from aiida.common.lang import type_check @@ -13,7 +12,7 @@ __all__ = ('Repository',) -FilePath = typing.Union[str, pathlib.Path] +FilePath = Union[str, pathlib.Path] class Repository: @@ -32,7 +31,6 @@ class keeps a reference of the virtual file hierarchy. This means that through t # pylint: disable=too-many-public-methods - _backend = None _file_cls = File def __init__(self, backend: AbstractRepositoryBackend = None): @@ -52,11 +50,11 @@ def __str__(self) -> str: return f'Repository<{str(self.backend)}>' @property - def uuid(self) -> typing.Optional[str]: + def uuid(self) -> Optional[str]: """Return the unique identifier of the repository or ``None`` if it doesn't have one.""" return self.backend.uuid - def initialise(self, **kwargs) -> None: + def initialise(self, **kwargs: Any) -> None: """Initialise the repository if it hasn't already been initialised. :param kwargs: keyword argument that will be passed to the ``initialise`` call of the backend. @@ -69,7 +67,7 @@ def is_initialised(self) -> bool: return self.backend.is_initialised @classmethod - def from_serialized(cls, backend: AbstractRepositoryBackend, serialized: typing.Dict) -> 'Repository': + def from_serialized(cls, backend: AbstractRepositoryBackend, serialized: Dict[str, Any]) -> 'Repository': """Construct an instance where the metadata is initialized from the serialized content. :param backend: instance of repository backend to use to actually store the file objects. @@ -83,10 +81,10 @@ def from_serialized(cls, backend: AbstractRepositoryBackend, serialized: typing. return instance - def reset(self): + def reset(self) -> None: self._directory = self._file_cls() - def serialize(self) -> typing.Dict: + def serialize(self) -> Dict[str, Any]: """Serialize the metadata into a JSON-serializable format. :return: dictionary with the content metadata. @@ -100,17 +98,18 @@ def hash(self) -> str: :return: the hash representing the contents of the repository. """ - objects = {} + objects: Dict[str, Any] = {} for root, dirnames, filenames in self.walk(): objects['__dirnames__'] = dirnames for filename in filenames: key = self.get_file(root / filename).key + assert key is not None, 'Expected FileType.File to have a key' objects[str(root / filename)] = self.backend.get_object_hash(key) return make_hash(objects) @staticmethod - def _pre_process_path(path: FilePath = None) -> typing.Union[pathlib.Path, None]: + def _pre_process_path(path: FilePath = None) -> pathlib.Path: """Validate and convert the path to instance of ``pathlib.Path``. This should be called by every method of this class before doing anything, such that it can safely assume that @@ -141,7 +140,7 @@ def backend(self) -> AbstractRepositoryBackend: """ return self._backend - def set_backend(self, backend: AbstractRepositoryBackend): + def set_backend(self, backend: AbstractRepositoryBackend) -> None: """Set the backend for this repository. :param backend: the repository backend. @@ -150,7 +149,7 @@ def set_backend(self, backend: AbstractRepositoryBackend): type_check(backend, AbstractRepositoryBackend) self._backend = backend - def _insert_file(self, path: pathlib.Path, key: str): + def _insert_file(self, path: pathlib.Path, key: str) -> None: """Insert a new file object in the object mapping. .. note:: this assumes the path is a valid relative path, so should be checked by the caller. @@ -158,11 +157,7 @@ def _insert_file(self, path: pathlib.Path, key: str): :param path: the relative path where to store the object in the repository. :param key: fully qualified identifier for the object within the repository. """ - if path.parent: - directory = self.create_directory(path.parent) - else: - directory = self.get_directory - + directory = self.create_directory(path.parent) directory.objects[path.name] = self._file_cls(path.name, FileType.FILE, key) def create_directory(self, path: FilePath) -> File: @@ -186,12 +181,12 @@ def create_directory(self, path: FilePath) -> File: return directory - def get_hash_keys(self) -> typing.List[str]: + def get_hash_keys(self) -> List[str]: """Return the hash keys of all file objects contained within this repository. :return: list of file object hash keys. """ - hash_keys = [] + hash_keys: List[str] = [] def add_hash_keys(keys, objects): """Recursively add keys of all file objects to the keys list.""" @@ -264,7 +259,7 @@ def get_file(self, path: FilePath) -> File: return file_object - def list_objects(self, path: FilePath = None) -> typing.List[File]: + def list_objects(self, path: FilePath = None) -> List[File]: """Return a list of the objects contained in this repository sorted by name, optionally in given sub directory. :param path: the relative path of the directory. @@ -276,7 +271,7 @@ def list_objects(self, path: FilePath = None) -> typing.List[File]: directory = self.get_directory(path) return sorted(directory.objects.values(), key=lambda obj: obj.name) - def list_object_names(self, path: FilePath = None) -> typing.List[str]: + def list_object_names(self, path: FilePath = None) -> List[str]: """Return a sorted list of the object names contained in this repository, optionally in the given sub directory. :param path: the relative path of the directory. @@ -287,7 +282,7 @@ def list_object_names(self, path: FilePath = None) -> typing.List[str]: """ return [entry.name for entry in self.list_objects(path)] - def put_object_from_filelike(self, handle: io.BufferedReader, path: FilePath): + def put_object_from_filelike(self, handle: BinaryIO, path: FilePath) -> None: """Store the byte contents of a file in the repository. :param handle: filelike object with the byte content to be stored. @@ -298,7 +293,7 @@ def put_object_from_filelike(self, handle: io.BufferedReader, path: FilePath): key = self.backend.put_object_from_filelike(handle) self._insert_file(path, key) - def put_object_from_file(self, filepath: FilePath, path: FilePath): + def put_object_from_file(self, filepath: FilePath, path: FilePath) -> None: """Store a new object under `path` with contents of the file located at `filepath` on the local file system. :param filepath: absolute path of file whose contents to copy to the repository @@ -308,7 +303,7 @@ def put_object_from_file(self, filepath: FilePath, path: FilePath): with open(filepath, 'rb') as handle: self.put_object_from_filelike(handle, path) - def put_object_from_tree(self, filepath: FilePath, path: FilePath = None): + def put_object_from_tree(self, filepath: FilePath, path: FilePath = None) -> None: """Store the entire contents of `filepath` on the local file system in the repository with under given `path`. :param filepath: absolute path of the directory whose contents to copy to the repository. @@ -333,9 +328,9 @@ def put_object_from_tree(self, filepath: FilePath, path: FilePath = None): if path.parts: self.create_directory(path) - for root, dirnames, filenames in os.walk(filepath): + for root_str, dirnames, filenames in os.walk(filepath): - root = pathlib.Path(root) + root = pathlib.Path(root_str) for dirname in dirnames: self.create_directory(path / root.relative_to(filepath) / dirname) @@ -365,7 +360,7 @@ def has_object(self, path: FilePath) -> bool: return True @contextlib.contextmanager - def open(self, path: FilePath) -> io.BufferedReader: + def open(self, path: FilePath) -> Iterator[BinaryIO]: """Open a file handle to an object stored under the given path. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method @@ -378,7 +373,9 @@ def open(self, path: FilePath) -> io.BufferedReader: :raises IsADirectoryError: if the object is a directory and not a file. :raises OSError: if the file could not be opened. """ - with self.backend.open(self.get_file(path).key) as handle: + key = self.get_file(path).key + assert key is not None, 'Expected FileType.File to have a key' + with self.backend.open(key) as handle: yield handle def get_object_content(self, path: FilePath) -> bytes: @@ -390,9 +387,11 @@ def get_object_content(self, path: FilePath) -> bytes: :raises IsADirectoryError: if the object is a directory and not a file. :raises OSError: if the file could not be opened. """ - return self.backend.get_object_content(self.get_file(path).key) + key = self.get_file(path).key + assert key is not None, 'Expected FileType.File to have a key' + return self.backend.get_object_content(key) - def delete_object(self, path: FilePath, hard_delete: bool = False): + def delete_object(self, path: FilePath, hard_delete: bool = False) -> None: """Soft delete the object from the repository. .. note:: can only delete file objects, but not directories. @@ -412,12 +411,13 @@ def delete_object(self, path: FilePath, hard_delete: bool = False): raise IsADirectoryError(f'object with path `{path}` is a directory.') if hard_delete: + assert file_object.key is not None, 'Expected FileType.File to have a key' self.backend.delete_object(file_object.key) directory = self.get_directory(path.parent) directory.objects.pop(path.name) - def delete(self): + def delete(self) -> None: """Delete the repository. .. important:: This will not just delete the contents of the repository but also the repository itself and all @@ -427,7 +427,7 @@ def delete(self): self.backend.erase() self.reset() - def erase(self): + def erase(self) -> None: """Delete all objects from the repository. .. important: this intentionally does not call through to any ``erase`` method of the backend, because unlike @@ -439,7 +439,7 @@ def erase(self): self.backend.delete_object(hash_key) self.reset() - def clone(self, source: 'Repository'): + def clone(self, source: 'Repository') -> None: """Clone the contents of another repository instance.""" if not isinstance(source, Repository): raise TypeError('source is not an instance of `Repository`.') @@ -451,7 +451,7 @@ def clone(self, source: 'Repository'): with source.open(root / filename) as handle: self.put_object_from_filelike(handle, root / filename) - def walk(self, path: FilePath = None) -> typing.Tuple[pathlib.Path, typing.List[str], typing.List[str]]: + def walk(self, path: FilePath = None) -> Iterable[Tuple[pathlib.Path, List[str], List[str]]]: """Walk over the directories and files contained within this repository. .. note:: the order of the dirname and filename lists that are returned is not necessarily sorted. This is in diff --git a/tests/repository/backend/test_abstract.py b/tests/repository/backend/test_abstract.py index 72a3c6040a..3a548bd515 100644 --- a/tests/repository/backend/test_abstract.py +++ b/tests/repository/backend/test_abstract.py @@ -30,6 +30,9 @@ def erase(self): def is_initialised(self) -> bool: return True + def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: + pass + @pytest.fixture(scope='function') def repository(): diff --git a/tests/repository/backend/test_disk_object_store.py b/tests/repository/backend/test_disk_object_store.py index 5165c419d0..8d668fb0ce 100644 --- a/tests/repository/backend/test_disk_object_store.py +++ b/tests/repository/backend/test_disk_object_store.py @@ -21,6 +21,13 @@ def repository(tmp_path): yield DiskObjectStoreRepositoryBackend(container=container) +def test_str(repository): + """Test the ``__str__`` method.""" + assert str(repository) + repository.initialise() + assert str(repository) + + def test_uuid(repository): """Test the ``uuid`` property.""" assert repository.uuid is None diff --git a/tests/repository/backend/test_sandbox.py b/tests/repository/backend/test_sandbox.py index d65a65ae68..3ef0694c5b 100644 --- a/tests/repository/backend/test_sandbox.py +++ b/tests/repository/backend/test_sandbox.py @@ -15,6 +15,13 @@ def repository(): yield SandboxRepositoryBackend() +def test_str(repository): + """Test the ``__str__`` method.""" + assert str(repository) + repository.initialise() + assert str(repository) + + def test_uuid(repository): """Test the ``uuid`` property.""" assert repository.uuid is None