diff --git a/.github/workflows/tox.yaml b/.github/workflows/tox.yaml index 5b0dcca..4012573 100644 --- a/.github/workflows/tox.yaml +++ b/.github/workflows/tox.yaml @@ -32,8 +32,4 @@ jobs: pipx install tox pipx install poetry - name: Run all tox tests - if: github.event_name != 'pull_request' - run: tox -- - - name: Run tox tests (fast only) - if: github.event_name == 'pull_request' - run: tox -- '-k not slow' \ No newline at end of file + run: tox -- \ No newline at end of file diff --git a/README.md b/README.md index 078df4c..8d2f84a 100644 --- a/README.md +++ b/README.md @@ -1,406 +1,59 @@ -# Poetry Demo +# DataJoint File Validator -This repository contains a demonstration for how to initialize a basic Python package. +This repository contains a Python package that validates file sets for DataJoint pipelines. -## Features +## Installation -- Runs pytests on pushes to default branch -- Deployment to PyPI when a version tag (`v.*.*`) is pushed -- Pip installable from source: `pip install -e .` - -## Tools Used in this Repository - -- Virtual environment, package management, and PyPI deployment using [`poetry`](https://python-poetry.org/) -- Unit testing using [`pytest`](https://pytest.org) and [`tox`](https://tox.wiki/en/latest/) -- Automated testing and PyPI deployments using GitHub Actions CI/CD - -## Tutorial - -Initialize git and poetry interactively -```console -$ git init -Initialized empty Git repository in /home/eho/ripl/repos/poetry-demo/.git/ -$ git checkout -b main -$ git branch -d master -$ poetry init - -This command will guide you through creating your pyproject.toml config. - -Package name [poetry-demo]: -Version [0.1.0]: -Description []: Example PyPI package deployment using poetry -Author [Ethan Ho , n to skip]: -License []: MIT -Compatible Python versions [^3.8]: - -Would you like to define your main dependencies interactively? (yes/no) [yes] no -Would you like to define your development dependencies interactively? (yes/no) [yes] yes -You can specify a package in the following forms: - - A single name (requests) - - A name and a constraint (requests@^2.23.0) - - A git url (git+https://github.com/python-poetry/poetry.git) - - A git url with a revision (git+https://github.com/python-poetry/poetry.git#develop) - - A file path (../my-package/my-package.whl) - - A directory (../my-package/) - - A url (https://example.com/packages/my-package-0.1.0.tar.gz) - -Search for package to add (or leave blank to continue): pytest -Found 20 packages matching pytest - -Enter package # to add, or the complete package name if it is not listed: - [0] pytest - [1] pytest123 - [2] 131228_pytest_1 - [3] pytest-black - [4] pytest-libnotify - [5] pytest-automation - [6] pytest-ringo - [7] pytest-integration - [8] pytest-enhancements - [9] pytest-mercurial - > 0 -Enter the version constraint to require (or leave blank to use the latest version): -Using version ^7.0.1 for pytest - -Add a package: pytest-dotenv -Found 20 packages matching pytest-dotenv - -Enter package # to add, or the complete package name if it is not listed: - [0] pytest-dotenv - [1] pytest-django-dotenv - [2] dotenv - [3] dotenv-config - [4] typed-dotenv - [5] py-dotenv - [6] dotenv-cli - [7] django-dotenv - [8] pythonsite-dotenv - [9] firstclass-dotenv - > 0 -Enter the version constraint to require (or leave blank to use the latest version): -Using version ^0.5.2 for pytest-dotenv - -Add a package: - -Generated file - -[tool.poetry] -name = "poetry-demo" -version = "0.1.0" -description = "Example PyPI package deployment using poetry" -authors = ["Ethan Ho "] -license = "MIT" - -[tool.poetry.dependencies] -python = "^3.8" - -[tool.poetry.dev-dependencies] -pytest = "^7.0.1" -pytest-dotenv = "^0.5.2" - -[build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" - - -Do you confirm generation? (yes/no) [yes] -``` - -We then install the dependencies that we defined in the above `pyproject.toml` file: - -```console -$ poetry install -Creating virtualenv poetry-demo-rPLVa0Kh-py3.8 in /home/eho/.cache/pypoetry/virtualenvs -Updating dependencies -Resolving dependencies... (5.9s) - -Writing lock file - -Package operations: 10 installs, 0 updates, 0 removals - - • Installing pyparsing (3.0.7) - • Installing attrs (21.4.0) - • Installing iniconfig (1.1.1) - • Installing packaging (21.3) - • Installing pluggy (1.0.0) - • Installing py (1.11.0) - • Installing tomli (2.0.1) - • Installing pytest (7.0.1) - • Installing python-dotenv (0.19.2) - • Installing pytest-dotenv (0.5.2) -``` - -We can now add our package source code to a subdirectory named `poetry_demo`. Note that the module directory should be snake_cased. - -``` -$ mkdir poetry_demo -$ touch poetry_demo/__init__.py poetry_demo/__main__.py -$ echo 'import pandas as pd' > poetry_demo/__init__.py -``` - -Note that we're importing `pandas` in our package. Poetry makes it easy to add package dependencies: - -```console -$ poetry add pandas -Using version ^1.4.1 for pandas - -Updating dependencies -Resolving dependencies... (17.2s) - -Writing lock file - -Package operations: 5 installs, 0 updates, 0 removals - - • Installing six (1.16.0) - • Installing numpy (1.22.2) - • Installing python-dateutil (2.8.2) - • Installing pytz (2021.3) - • Installing pandas (1.4.1) -``` - -Let's set up testing for this package. We will do this by following the canonical file structure for [pytest](https://docs.pytest.org/en/7.0.x/explanation/goodpractices.html#choosing-a-test-layout-import-rules). - -```console -$ mkdir tests -$ echo 'import poetry_demo' > tests/test_import.py -``` - -We can install `pytest` as a development dependency. This means that it will be installed when a developer (or CI workflow) runs `poetry install` from the repository root, but it will not be included in the build. We'll also install `pytest-dotenv` so that environment vars in a `.env` file will be available in pytests. - -```console -$ poetry add --dev pytest pytest-dotenv -$ echo 'ENV_USED_IN_PYTESTS=0' > .env -``` - -Prefixing commands with `poetry run` runs them in the poetry-managed virtual environment. Let's run our test suite: - -```console -$ poetry run pytest -======================================= test session starts ======================================== -platform linux -- Python 3.8.10, pytest-7.0.1, pluggy-1.0.0 -rootdir: /home/eho/ripl/repos/poetry-demo -plugins: dotenv-0.5.2 -collected 0 items - -====================================== no tests ran in 0.36s ======================================= -``` - -Similarly, we can invoke `.py` scripts, open an interactive `ipython` session, or open an interactive shell environment (similar to `source my_virtual_env/bin/activate`): - -```console -$ poetry run python3 poetry_demo/__main__.py -$ poetry run ipython -/home/eho/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py:802: UserWarning: Attempting to work in a virtualenv. If you encounter problems, please install IPython inside the virtualenv. - warn( -Python 3.8.10 (default, Nov 26 2021, 20:14:08) -Type 'copyright', 'credits' or 'license' for more information -IPython 8.0.1 -- An enhanced Interactive Python. Type '?' for help. - -[ins] In [1]: from poetry_demo import * - -[ins] In [2]: -Do you really want to exit ([y]/n)? y -$ poetry shell -Spawning shell within /home/eho/.cache/pypoetry/virtualenvs/poetry-demo-rPLVa0Kh-py3.8 -$ deactivate -``` - -We can use poetry to build and publish the package to PyPI. All we need are our PyPI credentials: - -```console -$ poetry build -Building poetry-demo (0.1.0) - - Building sdist - - Built poetry-demo-0.1.0.tar.gz - - Building wheel - - Built poetry_demo-0.1.0-py3-none-any.whl -$ poetry publish - -No suitable keyring backends were found -Using a plaintext file to store and retrieve credentials -Username: ^C% -``` - -We can automate PyPI deployment using GitHub Actions continuous integration and deployment (CI/CD). The deployment will trigger when we push git tags that match the glob query `v.*.*`. -```console -$ mkdir -p .github/workflows -$ touch .github/workflows/pypi.yaml -$ code .github/workflows/pypi.yaml -$ cat .github/workflows/pypi.yaml -name: PyPI - -on: - push: - tags: - # run whenever a version tag is pushed, e.g. v1.1.0 - - "v*.*.*" - paths-ignore: - # don't run when docs are pushed - - '**.md' - - 'docs/**' - - 'docsrc/**' - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Publish GH release - uses: softprops/action-gh-release@v1 - - name: Build using poetry and publish to PyPi - uses: JRubics/poetry-publish@v1.8 - with: - pypi_token: ${{ secrets.PYPI_TOKEN }}% +```bash +pip install datajoint_file_validator@git+https://github.com/ethho/datajoint-file-validator.git ``` -Note that we would need to enter a valid [PyPI API token](https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/?highlight=access%20token#saving-credentials-on-github) in GitHub secrets under the name `PYPI_TOKEN`. - -Also note that as of writing, GitHub Actions offers unlimited Actions executions free of charge for public repositories. - -Let's do some more work on our testing environment. [`tox`](https://tox.wiki/en/latest/) allows us to test our package against multiple Python versions, and it integrates with `poetry` and GitHub Actions. All we need is to install `tox` (`pip install tox`) and write a `tox.ini` file at the repository root: -```console -$ touch tox.ini -$ code tox.ini -$ cat tox.ini -[tox] -isolated_build = true -envlist = py38 - -[testenv] -allowlist_externals = - poetry -commands = - poetry install - ; Check that the package is importable - poetry run python -c 'import poetry_demo' - poetry run pytest % -$ tox -.package create: /home/eho/ripl/repos/poetry-demo/.tox/.package -.package installdeps: poetry-core>=1.0.0 -py38 create: /home/eho/ripl/repos/poetry-demo/.tox/py38 -py38 inst: /home/eho/ripl/repos/poetry-demo/.tox/.tmp/package/1/poetry-demo-0.1.0.tar.gz -py38 installed: numpy==1.22.2,pandas==1.4.1,poetry-demo @ file:///home/eho/ripl/repos/poetry-demo/.tox/.tmp/package/1/poetry-demo-0.1.0.tar.gz,python-dateutil==2.8.2,pytz==2021.3,six==1.16.0 -py38 run-test-pre: PYTHONHASHSEED='618313639' -py38 run-test: commands[0] | poetry install -Installing dependencies from lock file +## Quick Start -Package operations: 10 installs, 0 updates, 0 removals +Validate a fileset against an existing manifest: - • Installing pyparsing (3.0.7) - • Installing attrs (21.4.0) - • Installing iniconfig (1.1.1) - • Installing packaging (21.3) - • Installing pluggy (1.0.0) - • Installing py (1.11.0) - • Installing tomli (2.0.1) - • Installing pytest (7.0.1) - • Installing python-dotenv (0.19.2) - • Installing pytest-dotenv (0.5.2) +```python +from datajoint_file_validator import validate -Installing the current project: poetry-demo (0.1.0) -py38 run-test: commands[1] | poetry run python -c 'import poetry_demo' -py38 run-test: commands[2] | poetry run pytest -========================================= test session starts ========================================= -platform linux -- Python 3.8.10, pytest-7.0.1, pluggy-1.0.0 -cachedir: .tox/py38/.pytest_cache -rootdir: /home/eho/ripl/repos/poetry-demo -plugins: dotenv-0.5.2 -collected 0 items +my_dataset_path = 'tests/data/filesets/fileset0' +manifest_path = 'datajoint_file_validator/manifests/demo_dlc_v0.1.yaml' +success, report = validate(my_dataset_path, manifest_path, verbose=True, format='plain') +# Validation failed with the following errors: +# [ +# { +# 'rule': 'Min total files', +# 'rule_description': 'Check that there are at least 6 files anywhere in the fileset', +# 'constraint_id': 'count_min', +# 'constraint_value': 6, +# 'errors': 'constraint `count_min` failed: 4 < 6' +# } +# ] -======================================== no tests ran in 0.29s ======================================== -ERROR: InvocationError for command /home/eho/.poetry/bin/poetry run pytest (exited with code 5) -_______________________________________________ summary _______________________________________________ -ERROR: py38: commands failed +print(success) +# False ``` -Tox reports failure because we don't have any real pytests (functions named like `test_*`) in the directory `./tests`. -We can set up a GitHub Actions workflow that automatically runs tox on pushes to pull requests or the `main` branch: +Alternatively, validate using the included command line interface: ```console -$ touch .github/workflows/tox.yaml -$ code .github/workflows/tox.yaml -$ cat .github/workflows/tox.yaml -name: Tox - -on: - push: - branches: - - main - paths-ignore: - - '**.md' - - 'docs/**' - - 'docsrc/**' - - pull_request: - branches: - - main - paths-ignore: - - '**.md' - - 'docs/**' - - 'docsrc/**' - -jobs: - test: - name: Run unit tests in tox - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v2 - with: - python-version: '3.7' - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: '3.8' - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: '3.9' - - name: Install Python dependencies - run: python -m pip install -q poetry tox - - name: Run all tox tests - if: github.event_name != 'pull_request' - run: tox -- - - name: Run tox tests (fast only) +$ datajoint-file-validator validate tests/data/filesets/fileset0 datajoint_file_validator/manifests/demo_dlc_v0.1.yaml +❌ Validation failed with 1 errors! +┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ +┃ ┃ Rule ┃ ┃ Constraint ┃ ┃ +┃ Rule ID ┃ Description ┃ Constraint ID ┃ Value ┃ Errors ┃ +┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ +│ Min total │ Check that │ count_min │ 6 │ constraint │ +│ files │ there are at │ │ │ `count_min` │ +│ │ least 6 files │ │ │ failed: 4 < 6 │ +│ │ anywhere in │ │ │ │ +│ │ the fileset │ │ │ │ +└────────────────┴────────────────┴───────────────┴───────────────┴────────────────┘ ``` -We'll add a standard Python `.gitignore` file: +## Author -```console -$ wget https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore ---2022-02-28 15:00:34-- https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore -Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ... -Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: 2762 (2.7K) [text/plain] -Saving to: ‘Python.gitignore’ - -Python.gitignore 100%[==================================>] 2.70K --.-KB/s in 0.001s +Ethan Ho @ethho -2022-02-28 15:00:35 (2.12 MB/s) - ‘Python.gitignore’ saved [2762/2762] -FINISHED --2022-02-28 15:00:35-- -Total wall clock time: 0.1s -Downloaded: 1 files, 2.7K in 0.001s (2.12 MB/s) -$ mv Python.gitignore .gitignore -``` +## License -Finally, we create our first git commit and push our changes to a new GitHub repository using the [GitHub CLI](https://cli.github.com/). - -```console -$ git add . -$ git commit -m 'Initial commit' -❯ gh repo create -? What would you like to do? Push an existing local repository to GitHub -? Path to local repository . -? Repository name poetry-demo -? Description Example Python package using poetry, tox, and GitHub Actions -? Visibility Public -✓ Created repository ethho/poetry-demo on GitHub -? Add a remote? Yes -? What should the new remote be called? origin -✓ Added remote git@github.com:ethho/poetry-demo.git -? Would you like to push commits from the current branch to the "origin"? Yes -✓ Pushed commits to git@github.com:ethho/poetry-demo.git -$ gh repo view --web -``` \ No newline at end of file +[MIT](LICENSE) \ No newline at end of file diff --git a/datajoint_file_validator/__init__.py b/datajoint_file_validator/__init__.py index 371a0f3..94731ed 100644 --- a/datajoint_file_validator/__init__.py +++ b/datajoint_file_validator/__init__.py @@ -1,3 +1,5 @@ -from . import ( - snapshot, validate, manifest, result -) +from . import snapshot, main, manifest, result +from .snapshot import Snapshot +from .manifest import Manifest +from .result import ValidationResult +from .main import validate_snapshot, validate diff --git a/datajoint_file_validator/cli.py b/datajoint_file_validator/cli.py index 8945bd5..e85f4e1 100644 --- a/datajoint_file_validator/cli.py +++ b/datajoint_file_validator/cli.py @@ -1,8 +1,11 @@ import typer +from enum import Enum from typing_extensions import Annotated +import yaml from rich import print as rprint from rich.console import Console from rich.table import Table +from . import main console = Console() app = typer.Typer() @@ -15,7 +18,6 @@ def callback(): """ -@app.command() def show_table(): table = Table("Name", "Item") table.add_row("Rick", "Portal Gun") @@ -23,7 +25,6 @@ def show_table(): console.print(table) -@app.command() def open_file(path: str): """ Open a file at PATH in the default app. @@ -32,7 +33,6 @@ def open_file(path: str): typer.launch(path, locate=True) -@app.command() def read_file(path: Annotated[typer.FileText, typer.Option()]): """ Reads lines from a file at PATH. @@ -41,8 +41,7 @@ def read_file(path: Annotated[typer.FileText, typer.Option()]): rprint(f"Config line: {path}") -@app.command() -def main(name: str, lastname: str = "", formal: bool = False): +def _main(name: str, lastname: str = "", formal: bool = False): """ Say hi to NAME, optionally with a --lastname. @@ -52,3 +51,39 @@ def main(name: str, lastname: str = "", formal: bool = False): rprint(f"Good day Ms. {name} {lastname}.") else: rprint(f"Hello {name} {lastname}") + + +class DisplayFormat(str, Enum): + table = "table" + yaml = "yaml" + plain = "plain" + + +@app.command() +def validate( + target: Annotated[str, typer.Argument(..., exists=True)], + manifest: Annotated[str, typer.Argument(..., exists=True)], + raise_err: bool = False, + format: DisplayFormat = DisplayFormat.table, +): + """ + Validate a target against a manifest. + """ + success, report = main.validate( + target, manifest, verbose=False, raise_err=raise_err + ) + if success: + rprint(":heavy_check_mark: Validation successful!") + return + + rprint(f":x: Validation failed with {len(report)} errors!") + if format == DisplayFormat.table: + table = main.table_from_report(report) + console = Console() + console.print(table) + elif format == DisplayFormat.yaml: + rprint() + rprint(yaml.dump(report)) + elif format == DisplayFormat.plain: + rprint(report) + raise typer.Exit(code=1) diff --git a/datajoint_file_validator/config.py b/datajoint_file_validator/config.py new file mode 100644 index 0000000..0dde7e2 --- /dev/null +++ b/datajoint_file_validator/config.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + + +@dataclass +class Config: + """Config class for the application""" + + allow_eval: bool = True + debug: bool = True + + +config = Config() diff --git a/datajoint_file_validator/constraint/__init__.py b/datajoint_file_validator/constraint/__init__.py new file mode 100644 index 0000000..8753668 --- /dev/null +++ b/datajoint_file_validator/constraint/__init__.py @@ -0,0 +1,170 @@ +import re +from dataclasses import dataclass +from typing import Any, Iterable, Callable, Tuple +from cerberus import Validator +from ..config import config +from ..snapshot import Snapshot +from ..result import ValidationResult +from ..error import DJFileValidatorError + +Schema = Any + + +@dataclass(frozen=True) +class Constraint: + """A single constraint that evaluates True or False for a fileset.""" + + def validate(self, snapshot: Snapshot) -> ValidationResult: + """Validate a snapshot against a single constraint.""" + raise NotImplementedError( + "Subclass of Constraint must implement validate() method." + ) + + @property + def name(self): + _name = getattr(self, "_name", None) + return _name if _name else self.__class__.__name__ + + +@dataclass(frozen=True) +class CountMinConstraint(Constraint): + """Constraint for `count_min`.""" + + val: int + + def validate(self, snapshot: Snapshot) -> ValidationResult: + status = len(snapshot) >= self.val + return ValidationResult( + status=status, + message=None + if status + else f"constraint `{self.name}` failed: {len(snapshot)} < {self.val}", + context=dict(snapshot=snapshot, constraint=self), + ) + + +@dataclass(frozen=True) +class CountMaxConstraint(Constraint): + """Constraint for `count_max`.""" + + val: int + + def validate(self, snapshot: Snapshot) -> ValidationResult: + status = len(snapshot) <= self.val + return ValidationResult( + status=status, + message=None + if status + else f"constraint `{self.name}` failed: {len(snapshot)} > {self.val}", + context=dict(snapshot=snapshot, constraint=self), + ) + + +@dataclass(frozen=True) +class SchemaConvertibleConstraint(Constraint): + def to_schema(self) -> Schema: + """ + Convert this constraint to a Cerberus schema that each file in + the Snapshot will be validated against. + """ + raise NotImplementedError( + "Subclass of SchemaConvertibleConstraint must implement to_schema() method." + ) + + @staticmethod + def _validate_file(schema: Schema, file: dict) -> Validator: + v = Validator(allow_unknown=True) + v.validate(file, schema) + return v + + def validate(self, snapshot: Snapshot) -> ValidationResult: + """Validate a snapshot against a single constraint.""" + schema: Schema = self.to_schema() + validators: Iterable[Validator] = list( + map(lambda file: self._validate_file(schema, file), snapshot) + ) + return ValidationResult( + status=all(validators), + message=None + if all(validators) + else { + file["path"]: validator.errors + for file, validator in zip(snapshot, validators) + }, + context=dict(snapshot=snapshot, constraint=self), + ) + + +@dataclass(frozen=True) +class RegexConstraint(SchemaConvertibleConstraint): + """Constraint for `regex`.""" + + val: str + + def to_schema(self) -> Schema: + """ + Convert this constraint to a Cerberus schema that each file in + the Snapshot will be validated against. + """ + return {"path": {"type": "string", "required": True, "regex": self.val}} + + +@dataclass(frozen=True) +class EvalConstraint(Constraint): + """Constraint for `eval`.""" + + val: str + + @staticmethod + def _eval_function(definition: str) -> Tuple[Callable, str]: + # Import function definition into locals + try: + exec(definition) + except Exception as e: + raise e + + # Parse the function name from the definition + match = re.search(r"def (\w+)", definition) + if match: + function_name = match.group(1) + else: + raise ValueError(f"Could not parse function name from {definition}") + assert function_name in locals() + return locals()[function_name], function_name + + def validate(self, snapshot: Snapshot) -> ValidationResult: + if not config.allow_eval: + raise DJFileValidatorError( + "Eval constraint is not allowed. " + "Set `Config.allow_eval = True` to allow." + ) + try: + function, function_name = self._eval_function(self.val) + except Exception as e: + raise DJFileValidatorError( + f"Error parsing function in `{self.name}` constraint: {type(e).__name__}: {e}" + ) + try: + status = function(snapshot) + except Exception as e: + raise DJFileValidatorError( + f"Error validating function `{function_name}` in `{self.name}` constraint: {type(e).__name__}: {e}" + ) + return ValidationResult( + status=status, + message=None + if status + else f"constraint `{self.name}` failed: {function_name}(snapshot) returned False", + context=dict(snapshot=snapshot, constraint=self), + ) + + +CONSTRAINT_MAP = { + "count_min": CountMinConstraint, + "count_max": CountMaxConstraint, + "regex": RegexConstraint, + "eval": EvalConstraint, +} + +for name, cls in CONSTRAINT_MAP.items(): + cls._name = name diff --git a/datajoint_file_validator/error.py b/datajoint_file_validator/error.py new file mode 100644 index 0000000..fb689d8 --- /dev/null +++ b/datajoint_file_validator/error.py @@ -0,0 +1,2 @@ +class DJFileValidatorError(Exception): + pass diff --git a/datajoint_file_validator/main.py b/datajoint_file_validator/main.py index bbef418..6961701 100644 --- a/datajoint_file_validator/main.py +++ b/datajoint_file_validator/main.py @@ -1,31 +1,142 @@ import yaml -from cerberus import schema_registry -from cerberus import rules_set_registry - - -def _example_registry_add(): - schema_registry.add("non-system user", {"uid": {"min": 1000, "max": 0xFFFF}}) - schema = { - "sender": {"schema": "non-system user", "allow_unknown": True}, - "receiver": {"schema": "non-system user", "allow_unknown": True}, - } - rules_set_registry.extend( - (("boolean", {"type": "boolean"}), ("booleans", {"valuesrules": "boolean"})) - ) - schema = {"foo": "booleans"} +import cerberus +from typing import List, Dict, Any, Optional, Union, Tuple +from .manifest import Manifest, Rule +from .snapshot import Snapshot, create_snapshot, PathLike +from .result import ValidationResult +from .query import DEFAULT_QUERY +from rich import print as rprint +from rich.console import Console +from rich.table import Table + +ErrorReport = List[Dict[str, Any]] + + +def validate( + target: Union[Snapshot, PathLike], + manifest: Union[PathLike, Manifest], + verbose=False, + raise_err=False, + format='table', +) -> Tuple[bool, ErrorReport]: + """ + Validate a target against a manifest. + + Parameters + ---------- + target : PathLike | Snapshot + A path to a file or directory, or an instance of a Snapshot object. + manifest : PathLike | Manifest + Path to a manifest file, or an instance of a Manifest object. + verbose : bool + Print verbose output. + raise_err : bool + Raise an error if validation fails. + + Returns + ------- + result : dict + A dictionary with the validation result. + """ + # Infer how to fetch manifest + if isinstance(manifest, Manifest): + mani = manifest + elif isinstance(manifest, str): + mani = Manifest.from_yaml(manifest) + else: + raise ValueError("manifest must be a path or Manifest object.") + + # Infer how to create snapshot + if isinstance(target, str): + target = create_snapshot(target) + + return validate_snapshot(target, mani, verbose=verbose, raise_err=raise_err, format=format) -def _example_schema_from_yaml(): - schema_text = """ - name: - type: string - age: - type: integer - min: 10 +def table_from_report(report: ErrorReport) -> Table: """ - schema = yaml.safe_load(schema_text) - document = {"name": "Little Joe", "age": 5} - v.validate(document, schema) - # False - v.errors - # {'age': ['min value is 10']} + Format a validation report as a rich table. + """ + columns = [ + "rule", + "rule_description", + "constraint_id", + "constraint_value", + "errors", + ] + col_names = [ + "Rule ID", + "Rule Description", + "Constraint ID", + "Constraint Value", + "Errors", + ] + table = Table(*col_names, show_lines=True) + for item in report: + as_tup = tuple(str(item[col]) for col in columns) + table.add_row(*as_tup) + return table + + +def validate_snapshot( + snapshot: Snapshot, + manifest: Manifest, + verbose=False, + raise_err=False, + format="table", +) -> Tuple[bool, ErrorReport]: + """ + Validate a snapshot against a manifest. + + Parameters + ---------- + snapshot : Snapshot + A snapshot dictionary. + manifest_path : PathLike + Path to a manifest file. + verbose : bool + Print verbose output. + raise_err : bool + Raise an error if validation fails. + + Returns + ------- + result : dict + A dictionary with the validation result. + """ + results: List[Dict[str, ValidationResult]] = list( + map(lambda rule: rule.validate(snapshot), manifest.rules) + ) + success = all(map(lambda result: all(result.values()), results)) + + # Generate error report + error_report = [] + for rule, result in zip(manifest.rules, results): + for constraint, valresult in result.items(): + if valresult.status: + continue + error_report.append( + { + "rule": rule.id, + "rule_description": rule.description, + "constraint_id": constraint, + "constraint_value": valresult.context["constraint"].val, + "errors": valresult.message, + } + ) + if verbose and not success: + rprint("Validation failed with the following errors:") + if format == "table": + table = table_from_report(error_report) + console = Console() + console.print(table) + elif format == "yaml": + rprint(yaml.dump(error_report)) + elif format == "plain": + rprint(error_report) + else: + raise ValueError(f"Unsupported format: {format}") + if raise_err and not success: + raise DJFileValidatorError("Validation failed.") + + return success, error_report diff --git a/datajoint_file_validator/manifest.py b/datajoint_file_validator/manifest.py index ee7efc1..11eccc8 100644 --- a/datajoint_file_validator/manifest.py +++ b/datajoint_file_validator/manifest.py @@ -1,31 +1,51 @@ -from dataclasses import dataclass -from typing import Dict, List, Any -from .snapshot import PathLike - -Manifest = Any - - -@dataclass -class Constraint: - """A single constraint that evaluates True or False for a fileset.""" - operator: str - - -@dataclass -class Rule: - """A single rule for a fileset.""" - name: str - description: str - root: PathLike - constraints: List[Constraint] - +from dataclasses import dataclass, field +from typing import Dict, List, Any, Optional +import yaml +from .yaml import read_yaml +from .error import DJFileValidatorError +from .result import ValidationResult +from .snapshot import Snapshot, PathLike, FileMetadata +from .config import config +from .rule import Rule @dataclass class Manifest: - """Manifest for a fileset, defining a fileset type.""" + """ + Manifest for a fileset, defining a fileset type. + This class is responsible for parsing a manifest file, validating its + syntax, and converting into a query and a set of rules. + """ - name: str + id: str version: str description: str - rules: List[Rule] + rules: List[Rule] = field(default_factory=list) + uri: Optional[str] = None + + @staticmethod + def check_valid(d: Dict) -> bool: + """User Cerberus to check if manifest has valid syntax.""" + raise NotImplementedError() + + @classmethod + def from_yaml(cls, path: PathLike, **kw) -> "Manifest": + """Load a manifest from a YAML file.""" + return cls.from_dict(read_yaml(path), **kw) + + @classmethod + def from_dict(cls, d: Dict, check_syntax=False) -> "Manifest": + """Load a manifest from a dictionary.""" + if check_syntax: + assert cls.check_valid(d) + self_ = cls( + # TODO: hash by default + id=d["id"], + uri=d.get("uri"), + version=d["version"], + description=d["description"], + rules=[ + Rule.from_dict(rule, check_syntax=check_syntax) for rule in d["rules"] + ], + ) + return self_ diff --git a/datajoint_file_validator/manifests/demo_bob_lab_frames_v0.1.yaml b/datajoint_file_validator/manifests/demo_bob_lab_frames_v0.1.yaml index 62c1d72..53115e1 100644 --- a/datajoint_file_validator/manifests/demo_bob_lab_frames_v0.1.yaml +++ b/datajoint_file_validator/manifests/demo_bob_lab_frames_v0.1.yaml @@ -1,16 +1,16 @@ -id: https://raw.githubusercontent.com/datajoint/datajoint-file-validator/main/manifests/demo_bob_lab_frames_v0.1 -name: bob_lab_frames +uri: https://raw.githubusercontent.com/datajoint/datajoint-file-validator/main/manifests/demo_bob_lab_frames_v0.1 +id: bob_lab_frames version: 0.1.0 description: Example of a fileset type for a Bob Lab fileset author: name: Ethan Ho email: ethan@datajoint.com rules: - - name: num_files_constraint + - id: num_files_constraint description: Check that there are between 1 and 5 files in the directory count_min: 1 count_max: 5 - - name: num_files_per_subdir + - id: num_files_per_subdir description: > Check that each subdirectory has between 1 and 100 files and that subdirectories only contain .mp4 files. @@ -18,16 +18,16 @@ rules: path: "/*" type: directory rules: - - name: count_per_subdir + - id: count_per_subdir # Here, the query ("/**") is relative to the sub-directory # instead of the root. query: "/" count_min: 1 count_max: 100 - - name: check_file_type + - id: check_file_type # query: "/**" is implicit here regex: "^.*\\.mp4$" - - name: file_type_in_subdir + - id: file_type_in_subdir description: Check that each subdirectory has exactly one obs.md file query: # Instead of path, regex, or type, we can write a custom query function diff --git a/datajoint_file_validator/manifests/demo_dlc_v0.1.yaml b/datajoint_file_validator/manifests/demo_dlc_v0.1.yaml index bf6c258..a95b519 100644 --- a/datajoint_file_validator/manifests/demo_dlc_v0.1.yaml +++ b/datajoint_file_validator/manifests/demo_dlc_v0.1.yaml @@ -1,33 +1,30 @@ -id: https://raw.githubusercontent.com/datajoint/datajoint-file-validator/main/manifests/demo_dlc_v0.1 -name: demo_dlc +uri: https://raw.githubusercontent.com/datajoint/datajoint-file-validator/main/manifests/demo_dlc_v0.1 +id: demo_dlc version: 0.1.0 description: DeepLabCut demo fileset type author: - name: Ethan Ho + id: Ethan Ho email: ethan@datajoint.com rules: - - name: "Min total files" + - id: "Min total files" description: "Check that there are at least 6 files anywhere in the fileset" # This is the default query, which returns every file, even if it is in # a subdirectory. - query: "/**" + query: "**" count_min: 6 - - name: "Count .mp4 files" - description: "Check that there are at least 4 .mp4 files and their total size is at least 2.0K" - query: "/**/*.mp4" - # This rule has two constraints, which both must be satisfied for the rule - # to pass. - count_min: 4 - total_size_min: 2.0K + - id: "Count .mp4 files" + description: "Check that there are at least 2 .mp4 files" + query: "**/*.mp4" + count_min: 2 # More complex rules below - - name: "Only .mp4 and .csv at top level" + - id: "Only .mp4 and .csv at top level" # This query returns only files at the top level, and will not consider # files in subdirectories. - query: "/*" + query: "*" # Note that this regex will not match subdirectores. regex: ".*\\.mp4$|.*\\.csv$" - - name: "Every video file must have a corresponding csv file" + - id: "Every video file must have a corresponding csv file" description: "Check that every video file has a corresponding csv file" # query: "/**" is implicit here # Instead of other constraints, we can use eval to write a custom constraint function. @@ -36,7 +33,3 @@ rules: n_mp4 = len([r for r in results if r['extension'] == 'mp4']) n_csv = len([r for r in results if r['extension'] == 'csv']) return n_mp4 == n_csv - - name: "Size constraints" - description: "Check that the size of all files are within a certain range" - total_size_min: 2.0K - total_size_max: 10000 diff --git a/datajoint_file_validator/path_utils.py b/datajoint_file_validator/path_utils.py new file mode 100644 index 0000000..8ca5e84 --- /dev/null +++ b/datajoint_file_validator/path_utils.py @@ -0,0 +1,23 @@ +from typing import List +import os.path +from .snapshot import FileMetadata, Snapshot +from wcmatch import glob + +GLOB_FLAGS = glob.GLOBSTAR | glob.MARK | glob.FOLLOW + + +def find_matching_paths(filenames, patterns, flags=GLOB_FLAGS, **kw): + return glob.globfilter(filenames, patterns, flags=flags, **kw) + + +def find_matching_files_gen(snapshot: Snapshot, patterns): + filenames = [file.get("path") for file in snapshot] + return ( + file + for file in snapshot + if file.get("path") in set(find_matching_paths(filenames, patterns)) + ) + + +def find_matching_files(snapshot: Snapshot, patterns): + return list(find_matching_files_gen(snapshot, patterns)) diff --git a/datajoint_file_validator/query.py b/datajoint_file_validator/query.py new file mode 100644 index 0000000..41dbea9 --- /dev/null +++ b/datajoint_file_validator/query.py @@ -0,0 +1,31 @@ +import os +from dataclasses import dataclass +from pathlib import PurePath +from .snapshot import Snapshot, PathLike +from .path_utils import find_matching_files + +DEFAULT_QUERY = "**" + + +@dataclass(frozen=True) +class Query: + """An object representing a query against a snapshot.""" + + def filter(self, snapshot: Snapshot) -> Snapshot: + """Filter a Snapshot based on this query. Virtual method.""" + raise NotImplementedError("Subclass of Query must implement filter() method.") + + +@dataclass(frozen=True) +class GlobQuery(Query): + """A query that filters based on path. Includes support for glob wildcards.""" + + path: str = DEFAULT_QUERY + + def filter(self, snapshot: Snapshot) -> Snapshot: + """Filter a Snapshot based on this query.""" + return list(self._filter_generator(snapshot)) + + def _filter_generator(self, snapshot: Snapshot): + """Filter a Snapshot based on this query. Returns a generator.""" + return find_matching_files(snapshot, self.path) diff --git a/datajoint_file_validator/result.py b/datajoint_file_validator/result.py index 686a2f1..8d94f55 100644 --- a/datajoint_file_validator/result.py +++ b/datajoint_file_validator/result.py @@ -1,13 +1,21 @@ -from dataclasses import dataclass -from typing import Dict, Any +from dataclasses import dataclass, field +from typing import Dict, Any, Optional import cerberus @dataclass class ValidationResult: status: bool - errors: Any + # TODO + message: Any + context: Optional[Dict[str, Any]] = field(default_factory=dict) @classmethod def from_validator(cls, v: cerberus.Validator): return cls(status=v.status, errors=v.errors) + + def __repr__(self): + return f"ValidationResult(status={self.status}, message={self.message})" + + def __bool__(self) -> bool: + return self.status diff --git a/datajoint_file_validator/rule.py b/datajoint_file_validator/rule.py new file mode 100644 index 0000000..5681e95 --- /dev/null +++ b/datajoint_file_validator/rule.py @@ -0,0 +1,74 @@ +from dataclasses import dataclass, field +import hashlib +from typing import Dict, List, Any, Optional +from .constraint import Constraint, CONSTRAINT_MAP +from .result import ValidationResult +from .snapshot import Snapshot, PathLike, FileMetadata +from .query import Query, GlobQuery, DEFAULT_QUERY +from .config import config +from .error import DJFileValidatorError + + +@dataclass +class Rule: + """A single rule for a fileset.""" + + id: Optional[str] + description: Optional[str] + constraints: List[Constraint] = field(default_factory=list) + query: Query = field(default_factory=GlobQuery) + + def __post_init__(self): + if not self.id: + self.id = self._generate_id() + + def _generate_id(self) -> str: + return hashlib.sha1(hex(hash(self)).encode("utf-8")).hexdigest()[:7] + + def __hash__(self): + return hash((self.query, tuple(self.constraints))) + + def validate(self, snapshot: Snapshot) -> Dict[str, ValidationResult]: + filtered_snapshot: Snapshot = self.query.filter(snapshot) + if self.query.path == DEFAULT_QUERY and config.debug: + assert filtered_snapshot == snapshot + results = list( + map(lambda constraint: constraint.validate(filtered_snapshot), self.constraints) + ) + return { + constraint.name: result + for constraint, result in zip(self.constraints, results) + } + + @staticmethod + def compile_query(raw: Any) -> "Query": + assert isinstance(raw, str) + return GlobQuery(path=raw) + + @staticmethod + def compile_constraint(name: str, val: Any) -> "Constraint": + if name not in CONSTRAINT_MAP: + raise DJFileValidatorError(f"Unknown constraint: {name}") + try: + return CONSTRAINT_MAP[name](val) + except DJFileValidatorError as e: + raise DJFileValidatorError(f"Error parsing constraint {name}: {e}") + + @classmethod + def from_dict(cls, d: Dict, check_syntax=False) -> "Rule": + """Load a rule from a dictionary.""" + if check_syntax: + assert cls.check_valid(d) + id = d.pop("id", None) + try: + self_ = cls( + id=id, + description=d.pop("description", None), + query=cls.compile_query(d.pop("query", DEFAULT_QUERY)), + constraints=[ + cls.compile_constraint(name, val) for name, val in d.items() + ], + ) + except DJFileValidatorError as e: + raise DJFileValidatorError(f"Error parsing rule '{id}': {e}") + return self_ diff --git a/datajoint_file_validator/snapshot.py b/datajoint_file_validator/snapshot.py index c246db0..4cc7bf3 100644 --- a/datajoint_file_validator/snapshot.py +++ b/datajoint_file_validator/snapshot.py @@ -2,7 +2,8 @@ from datetime import datetime import pytz from dataclasses import dataclass, field, asdict -from pathlib import Path +from wcmatch import pathlib +from wcmatch.pathlib import Path from typing import List, Dict, Any, Optional, Union ENABLE_PATH_HANDLE = True @@ -10,11 +11,15 @@ @dataclass class FileMetadata: - """Metadata for a file.""" + """ + Metadata for a file. + """ name: str - path: str + path: str = field(init=False) abs_path: str + rel_path: str + extension: str size: int type: str last_modified: str @@ -24,8 +29,7 @@ class FileMetadata: _path: Optional[Path] = field(default=None, repr=False) def __post_init__(self): - # self.id = f'{self.phrase}_{self.word_type.name.lower()}' - pass + self.path = self.rel_path @staticmethod def to_iso_8601(time_ns: int): @@ -33,15 +37,16 @@ def to_iso_8601(time_ns: int): return time_.replace(tzinfo=pytz.UTC).isoformat() @classmethod - def from_path(cls, path: Path) -> "FileMetadata": + def from_path(cls, path: Path, root: Path) -> "FileMetadata": """Return a FileMetadata object from a Path object.""" return cls( name=path.name, - path=str(path.relative_to(path.parent)), + rel_path=str(path.relative_to(root)), abs_path=str(path), size=path.stat().st_size, type="file" if path.is_file() else "directory", last_modified=cls.to_iso_8601(path.stat().st_mtime_ns), + extension=path.suffix, mtime_ns=path.stat().st_mtime_ns, ctime_ns=path.stat().st_ctime_ns, atime_ns=path.stat().st_atime_ns, @@ -49,7 +54,7 @@ def from_path(cls, path: Path) -> "FileMetadata": ) def __repr__(self): - return f"{self.__class__.__name__}(path={self.path!r})" + return f"{self.__class__.__name__}(path={self.path!r}, type={self.type!r})" @staticmethod def dict_factory(x): @@ -66,18 +71,20 @@ def asdict(self): Snapshot = List[Dict[str, Any]] -def _snapshot_to_cls(path: str) -> List[FileMetadata]: +def _snapshot_to_cls( + path: str, flags=(pathlib.GLOBSTAR | pathlib.SPLIT | pathlib.FOLLOW) +) -> List[FileMetadata]: """Generate a snapshot of a file or directory at local `path`.""" root = Path(path) if root.is_file(): files = [FileMetadata.from_path(root)] elif root.is_dir(): - files = [FileMetadata.from_path(p) for p in root.glob("**/*")] + files = [FileMetadata.from_path(p, root) for p in root.glob("**", flags=flags)] else: raise ValueError(f"path {path} is not a file or directory") return files -def snapshot(path: str) -> Snapshot: +def create_snapshot(path: str) -> Snapshot: files = _snapshot_to_cls(path) return [f.asdict() for f in files] diff --git a/datajoint_file_validator/validate.py b/datajoint_file_validator/validate.py deleted file mode 100644 index 0f41885..0000000 --- a/datajoint_file_validator/validate.py +++ /dev/null @@ -1,63 +0,0 @@ -import yaml -import cerberus -from typing import Dict -from .manifest import Manifest -from .snapshot import Snapshot, snapshot, PathLike -from .result import ValidationResult - - -def validate_snapshot( - snapshot: Snapshot, manifest: Manifest, verbose=False, raise_err=False -) -> ValidationResult: - """ - Validate a snapshot against a manifest. - - Parameters - ---------- - snapshot : Snapshot - A snapshot dictionary. - manifest : Manifest - Contents of a manifest file. - verbose : bool - Print verbose output. - raise_err : bool - Raise an error if validation fails. - - Returns - ------- - result : dict - A dictionary with the validation result. - """ - # Convert manifest to Cerberus schema - raise NotImplementedError() - # Validate snapshot against schema using Cerberus - v = cerberus.Validator(schema) - v.validate(snapshot) - result = ValidationResult.from_validator(v) - return result - - -def validate_path( - path: PathLike, manifest: Manifest, verbose=False, raise_err=False -) -> ValidationResult: - """ - Validate a path against a manifest. - - Parameters - ---------- - path : PathLike - A path to a file or directory. - manifest : Manifest - Path to a manifest file. - verbose : bool - Print verbose output. - raise_err : bool - Raise an error if validation fails. - - Returns - ------- - result : ValidationResult - A dictionary with the validation result. - """ - snapshot = snapshot(path) - return validate(snapshot, manifest, verbose=verbose, raise_err=raise_err) diff --git a/datajoint_file_validator/yaml.py b/datajoint_file_validator/yaml.py new file mode 100644 index 0000000..256b079 --- /dev/null +++ b/datajoint_file_validator/yaml.py @@ -0,0 +1,9 @@ +import yaml +from typing import Any +from .snapshot import PathLike + + +def read_yaml(path: PathLike) -> Any: + """Read a YAML file from `path`.""" + with open(path, "r") as f: + return yaml.safe_load(f) diff --git a/poetry.lock b/poetry.lock index fdd75a3..93b7978 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,16 @@ # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +[[package]] +name = "bracex" +version = "2.4" +description = "Bash style brace expander." +optional = false +python-versions = ">=3.8" +files = [ + {file = "bracex-2.4-py3-none-any.whl", hash = "sha256:efdc71eff95eaff5e0f8cfebe7d01adf2c8637c8c92edaf63ef348c241a82418"}, + {file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"}, +] + [[package]] name = "cerberus" version = "1.3.5" @@ -334,7 +345,21 @@ files = [ {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] +[[package]] +name = "wcmatch" +version = "8.5" +description = "Wildcard/glob file name matcher." +optional = false +python-versions = ">=3.8" +files = [ + {file = "wcmatch-8.5-py3-none-any.whl", hash = "sha256:14554e409b142edeefab901dc68ad570b30a72a8ab9a79106c5d5e9a6d241bd5"}, + {file = "wcmatch-8.5.tar.gz", hash = "sha256:86c17572d0f75cbf3bcb1a18f3bf2f9e72b39a9c08c9b4a74e991e1882a8efb3"}, +] + +[package.dependencies] +bracex = ">=2.1.1" + [metadata] lock-version = "2.0" python-versions = ">=3.8" -content-hash = "d2af8bcdcf8de42c40221fe5a76803f54947957e01901d8007da09577258be6f" +content-hash = "de40b7fb81d03a274ce9f388d3d19adfcfa33b7e673ec866af2b1cea07564b00" diff --git a/pyproject.toml b/pyproject.toml index 97957e3..9b50797 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ cerberus = "^1.3.5" typer = {version = "^0.9.0", extras = ["all"]} pyyaml = "^6.0.1" pytz = "^2023.3.post1" +wcmatch = "^8.5" [tool.poetry.dev-dependencies] pytest = "^7.0.1" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5871ed8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1 @@ +import pytest diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame11.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame11.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame12.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame12.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame13.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame13.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame14.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame14.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame15.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame15.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame16.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame16.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame17.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame17.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame18.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame18.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame19.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame19.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame2.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame2.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame20.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame20.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame21.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame21.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame22.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame22.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame23.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame23.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame24.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame24.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame25.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame25.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame26.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame26.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame27.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame27.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame28.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame28.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame29.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame29.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame3.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame3.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame30.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame30.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame31.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame31.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame32.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame32.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame33.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame33.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame34.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame34.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame35.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame35.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame36.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame36.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame37.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame37.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame38.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame38.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame39.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame39.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame4.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame4.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame40.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame40.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame41.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame41.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame42.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame42.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame43.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame43.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame44.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame44.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame45.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame45.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame46.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame46.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame47.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame47.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame48.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame48.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame49.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame49.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame5.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame5.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame6.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame6.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame7.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame7.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame8.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame8.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame9.png b/tests/data/filesets/fileset1/2021-10-01/subject0_frame9.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject1_frame10.png b/tests/data/filesets/fileset1/2021-10-01/subject1_frame10.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject1_frame11.png b/tests/data/filesets/fileset1/2021-10-01/subject1_frame11.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject1_frame12.png b/tests/data/filesets/fileset1/2021-10-01/subject1_frame12.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject1_frame6.png b/tests/data/filesets/fileset1/2021-10-01/subject1_frame6.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject1_frame7.png b/tests/data/filesets/fileset1/2021-10-01/subject1_frame7.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject1_frame8.png b/tests/data/filesets/fileset1/2021-10-01/subject1_frame8.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject1_frame9.png b/tests/data/filesets/fileset1/2021-10-01/subject1_frame9.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame0.png b/tests/data/filesets/fileset1/2021-10-02/foo/bar.txt similarity index 100% rename from tests/data/filesets/fileset1/2021-10-01/subject0_frame0.png rename to tests/data/filesets/fileset1/2021-10-02/foo/bar.txt diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame0.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame0.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame1.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame1.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame10.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame10.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame11.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame11.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame12.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame12.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame13.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame13.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame14.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame14.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame15.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame15.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame16.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame16.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame17.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame17.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame18.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame18.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame19.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame19.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame2.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame2.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame20.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame20.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame21.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame21.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame22.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame22.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame23.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame23.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame24.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame24.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame25.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame25.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame26.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame26.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame27.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame27.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame28.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame28.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame29.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame29.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame3.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame3.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame30.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame30.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame31.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame31.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame32.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame32.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame33.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame33.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame34.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame34.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame35.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame35.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame36.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame36.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame37.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame37.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame38.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame38.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame39.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame39.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame4.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame4.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame40.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame40.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame41.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame41.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame42.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame42.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame43.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame43.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame44.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame44.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame45.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame45.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame46.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame46.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame47.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame47.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame48.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame48.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame49.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame49.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame5.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame5.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame50.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame50.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame51.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame51.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame52.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame52.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame6.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame6.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame7.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame7.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame8.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame8.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject0_frame9.png b/tests/data/filesets/fileset1/2021-10-02/subject0_frame9.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame10.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame10.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame11.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame11.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame12.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame12.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame13.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame13.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame14.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame14.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame15.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame15.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame16.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame16.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame17.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame17.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame18.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame18.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame19.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame19.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame20.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame20.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame21.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame21.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame8.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame8.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-02/subject1_frame9.png b/tests/data/filesets/fileset1/2021-10-02/subject1_frame9.png deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame1.png b/tests/data/filesets/fileset1/README.txt similarity index 100% rename from tests/data/filesets/fileset1/2021-10-01/subject0_frame1.png rename to tests/data/filesets/fileset1/README.txt diff --git a/tests/data/filesets/fileset1/2021-10-01/subject0_frame10.png b/tests/data/filesets/fileset1/obs.md similarity index 100% rename from tests/data/filesets/fileset1/2021-10-01/subject0_frame10.png rename to tests/data/filesets/fileset1/obs.md diff --git a/tests/test_fileset_validate.py b/tests/test_fileset_validate.py index acbced9..597fdef 100644 --- a/tests/test_fileset_validate.py +++ b/tests/test_fileset_validate.py @@ -4,17 +4,65 @@ @pytest.mark.parametrize( - "manifest_path,fileset_path", - ( - ( - "datajoint_file_validator/manifests/demo_dlc_v0.1.demo_dlc_v0.1.yaml", - "tests/data/filesets/fileset0", - ), - ), + "manifest_path", + ("datajoint_file_validator/manifests/demo_dlc_v0.1.yaml",), ) -def test_validate_built_in_filesets(manifest_path, fileset_path): - snapshot = djfval.snapshot.snapshot(fileset_path) - result = djfval.validate.validate_snapshot( - snapshot=snapshot, manifest=manifest_path, verbose=True, raise_err=False +def test_parse_manifest_from_yaml(manifest_path): + assert isinstance(manifest_path, str) + mani = djfval.manifest.Manifest.from_yaml(manifest_path) + assert isinstance(mani, djfval.manifest.Manifest) + + +def test_validate_fileset0(): + success, report = djfval.validate( + "tests/data/filesets/fileset0", + "datajoint_file_validator/manifests/demo_dlc_v0.1.yaml", + verbose=True, + raise_err=False, + ) + failed_constraints = [item["constraint_id"] for item in report] + assert not success + assert isinstance(report, list) + assert failed_constraints == ["count_min"] + + +def test_validate_fileset1(): + manifest = djfval.Manifest.from_dict( + { + "id": "test", + "version": "0.1", + "description": "Test manifest", + "rules": [ + { + "id": "count_min_max", + "description": "Check count min max", + "query": "**", + "count_min": 20, + }, + { + # id automatically generated from hash of constraints + "count_max": 3, + }, + { + "id": "max_txt_files", + "query": "*.txt", + "count_max": 5, + }, + { + "eval": "def test_custom(snapshot):\n return False", + }, + ], + } + ) + success, report = djfval.validate( + "tests/data/filesets/fileset1", + manifest, + verbose=True, + raise_err=False, ) - assert result["status"] + failed_constraints = [item["constraint_id"] for item in report] + failed_rules = [item["rule"] for item in report] + assert not success + assert isinstance(report, list) + assert "count_max" in failed_constraints + assert "max_txt_files" not in failed_rules diff --git a/tests/test_path_utils.py b/tests/test_path_utils.py new file mode 100644 index 0000000..0a2889d --- /dev/null +++ b/tests/test_path_utils.py @@ -0,0 +1,86 @@ +import os +import pytest +from datajoint_file_validator.path_utils import find_matching_paths + + +@pytest.fixture +def example0_paths(): + """ + flags = (glob.GLOBSTAR | glob.K | glob.X) + glob.glob('**', flags=flags) + """ + return set( + [ + "2021-10-02/", + "2021-10-02/subject1_frame1.png", + "2021-10-02/subject1_frame2.png", + "2021-10-02/obs.md", + "2021-10-02/subject1_frame3.png", + "2021-10-02/subject1_frame7.png", + "2021-10-02/subject1_frame0.png", + "2021-10-02/foo/", + "2021-10-02/foo/bar.txt", + "2021-10-02/subject1_frame4.png", + "2021-10-02/subject1_frame6.png", + "2021-10-02/subject1_frame5.png", + "obs.md", + "2021-10-01/", + "2021-10-01/subject1_frame1.png", + "2021-10-01/subject1_frame2.png", + "2021-10-01/obs.txt", + "2021-10-01/subject1_frame3.png", + "2021-10-01/subject1_frame0.png", + "2021-10-01/subject1_frame4.png", + "2021-10-01/subject1_frame5.png", + "README.txt", + ] + ) + + +def test_example0_paths(example0_paths): + assert set(find_matching_paths(example0_paths, "**")) == example0_paths + assert set(find_matching_paths(example0_paths, ["**"])) == example0_paths + assert not set(find_matching_paths(example0_paths, "./**")) + assert not set(find_matching_paths(example0_paths, "./*")) + + assert set(find_matching_paths(example0_paths, "**/*.md")) == { + "obs.md", + "2021-10-02/obs.md", + } + assert not set(find_matching_paths(example0_paths, "./**.md")) + assert set(find_matching_paths(example0_paths, "**/*.txt")) == { + "2021-10-01/obs.txt", + "2021-10-02/foo/bar.txt", + "README.txt", + } + assert set(find_matching_paths(example0_paths, "*/*/*.txt")) == { + "2021-10-02/foo/bar.txt", + } + assert set(find_matching_paths(example0_paths, "*/**/*.txt")) == { + "2021-10-01/obs.txt", + "2021-10-02/foo/bar.txt", + } + + assert set( + find_matching_paths(example0_paths, "2021-10-0*/subject1_frame*.png") + ) == { + "2021-10-01/subject1_frame0.png", + "2021-10-01/subject1_frame1.png", + "2021-10-01/subject1_frame2.png", + "2021-10-01/subject1_frame3.png", + "2021-10-01/subject1_frame4.png", + "2021-10-01/subject1_frame5.png", + "2021-10-02/subject1_frame0.png", + "2021-10-02/subject1_frame1.png", + "2021-10-02/subject1_frame2.png", + "2021-10-02/subject1_frame3.png", + "2021-10-02/subject1_frame4.png", + "2021-10-02/subject1_frame5.png", + "2021-10-02/subject1_frame6.png", + "2021-10-02/subject1_frame7.png", + } + + assert set(find_matching_paths(example0_paths, "*/")) == { + "2021-10-01/", + "2021-10-02/", + } diff --git a/tests/test_query.py b/tests/test_query.py new file mode 100644 index 0000000..5884119 --- /dev/null +++ b/tests/test_query.py @@ -0,0 +1,53 @@ +import pytest +import datajoint_file_validator as djfval + + +class TestGlobQuery: + def _glob_query(self, pattern, ss): + filtered_snapshot = djfval.query.GlobQuery(pattern).filter(ss) + return [item["path"] for item in filtered_snapshot] + + def test_glob_query(self): + fileset_path = "tests/data/filesets/fileset1" + ss = djfval.snapshot.create_snapshot(fileset_path) + ss_paths = [item["path"] for item in ss] + + assert set(self._glob_query("**", ss)) == set(ss_paths) + assert self._glob_query("2021-10-02", ss) == ["2021-10-02"] + assert set(self._glob_query("2021-10-02/*", ss)) == set( + [ + "2021-10-02/subject1_frame1.png", + "2021-10-02/subject1_frame2.png", + "2021-10-02/obs.md", + "2021-10-02/subject1_frame3.png", + "2021-10-02/subject1_frame7.png", + "2021-10-02/subject1_frame0.png", + "2021-10-02/foo", + "2021-10-02/subject1_frame4.png", + "2021-10-02/subject1_frame6.png", + "2021-10-02/subject1_frame5.png", + ] + ) + assert set(self._glob_query("2021-10-02/**", ss)) == set( + [ + "2021-10-02/subject1_frame1.png", + "2021-10-02/subject1_frame2.png", + "2021-10-02/obs.md", + "2021-10-02/subject1_frame3.png", + "2021-10-02/subject1_frame7.png", + "2021-10-02/subject1_frame0.png", + "2021-10-02/foo", + "2021-10-02/foo/bar.txt", + "2021-10-02/subject1_frame4.png", + "2021-10-02/subject1_frame6.png", + "2021-10-02/subject1_frame5.png", + ] + ) + assert set(self._glob_query("**/*.txt", ss)) == set( + ["2021-10-02/foo/bar.txt", "2021-10-01/obs.txt", "README.txt"] + ) + assert set(self._glob_query("*.txt", ss)) == set(["README.txt"]) + assert set(self._glob_query("*/**/*.txt", ss)) == set( + ["2021-10-02/foo/bar.txt", "2021-10-01/obs.txt"] + ) + assert set(self._glob_query("*/*.txt", ss)) == set(["2021-10-01/obs.txt"]) \ No newline at end of file diff --git a/tests/test_snapshot.py b/tests/test_snapshot.py index 2cd97c4..e63b4e2 100644 --- a/tests/test_snapshot.py +++ b/tests/test_snapshot.py @@ -4,16 +4,36 @@ import datajoint_file_validator as djfval -@pytest.mark.parametrize( - "fileset_path", - ( - "tests/data/filesets/fileset0", - "tests/data/filesets/fileset0/2021-10-01_poses.csv", - ), -) -def test_can_snapshot_to_cls(fileset_path): - snapshot = djfval.snapshot._snapshot_to_cls(fileset_path) - assert isinstance(snapshot, list) - for item in snapshot: - assert isinstance(item, djfval.snapshot.FileMetadata) - dicts = [item.asdict() for item in snapshot] +def test_snapshot_fileset1(): + fileset_path = "tests/data/filesets/fileset1" + files = djfval.snapshot.create_snapshot(fileset_path) + assert isinstance(files, list) + for item in files: + assert isinstance(item, dict) + paths = [item["path"] for item in files] + assert set(paths) == set( + [ + "2021-10-02", + "2021-10-02/subject1_frame1.png", + "2021-10-02/subject1_frame2.png", + "2021-10-02/obs.md", + "2021-10-02/subject1_frame3.png", + "2021-10-02/subject1_frame7.png", + "2021-10-02/subject1_frame0.png", + "2021-10-02/foo", + "2021-10-02/foo/bar.txt", + "2021-10-02/subject1_frame4.png", + "2021-10-02/subject1_frame6.png", + "2021-10-02/subject1_frame5.png", + "obs.md", + "2021-10-01", + "2021-10-01/subject1_frame1.png", + "2021-10-01/subject1_frame2.png", + "2021-10-01/obs.txt", + "2021-10-01/subject1_frame3.png", + "2021-10-01/subject1_frame0.png", + "2021-10-01/subject1_frame4.png", + "2021-10-01/subject1_frame5.png", + "README.txt", + ] + )