Skip to content
This repository has been archived by the owner on Jan 18, 2024. It is now read-only.

Commit

Permalink
Merge pull request #1 from ethho/dev-demo
Browse files Browse the repository at this point in the history
Demo approaches
  • Loading branch information
ethho committed Dec 6, 2023
2 parents bc39438 + 2d7f633 commit 68357d8
Show file tree
Hide file tree
Showing 170 changed files with 714 additions and 12 deletions.
16 changes: 5 additions & 11 deletions .github/workflows/tox.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
- name: Set up Python 3.11
uses: actions/setup-python@v2
with:
python-version: '3.7'
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: '3.9'
python-version: '3.11'
- name: Install Python dependencies
run: python -m pip install -q poetry tox
run: |
pipx install tox
pipx install poetry
- name: Run all tox tests
if: github.event_name != 'pull_request'
run: tox --
Expand Down
3 changes: 3 additions & 0 deletions datajoint_file_validator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from . import (
snapshot, validate, manifest, result
)
54 changes: 54 additions & 0 deletions datajoint_file_validator/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import typer
from typing_extensions import Annotated
from rich import print as rprint
from rich.console import Console
from rich.table import Table

console = Console()
app = typer.Typer()


@app.callback()
def callback():
"""
Welcome to datajoint-file-validator!
"""


@app.command()
def show_table():
table = Table("Name", "Item")
table.add_row("Rick", "Portal Gun")
table.add_row("Morty", "Plumbus")
console.print(table)


@app.command()
def open_file(path: str):
"""
Open a file at PATH in the default app.
"""
rprint(f":left_speech_bubble: Opening file {path}")
typer.launch(path, locate=True)


@app.command()
def read_file(path: Annotated[typer.FileText, typer.Option()]):
"""
Reads lines from a file at PATH.
"""
for line in path:
rprint(f"Config line: {path}")


@app.command()
def main(name: str, lastname: str = "", formal: bool = False):
"""
Say hi to NAME, optionally with a --lastname.
If --formal is used, say hi very formally.
"""
if formal:
rprint(f"Good day Ms. {name} {lastname}.")
else:
rprint(f"Hello {name} {lastname}")
31 changes: 31 additions & 0 deletions datajoint_file_validator/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import yaml
from cerberus import schema_registry
from cerberus import rules_set_registry


def _example_registry_add():
schema_registry.add("non-system user", {"uid": {"min": 1000, "max": 0xFFFF}})
schema = {
"sender": {"schema": "non-system user", "allow_unknown": True},
"receiver": {"schema": "non-system user", "allow_unknown": True},
}
rules_set_registry.extend(
(("boolean", {"type": "boolean"}), ("booleans", {"valuesrules": "boolean"}))
)
schema = {"foo": "booleans"}


def _example_schema_from_yaml():
schema_text = """
name:
type: string
age:
type: integer
min: 10
"""
schema = yaml.safe_load(schema_text)
document = {"name": "Little Joe", "age": 5}
v.validate(document, schema)
# False
v.errors
# {'age': ['min value is 10']}
31 changes: 31 additions & 0 deletions datajoint_file_validator/manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from dataclasses import dataclass
from typing import Dict, List, Any
from .snapshot import PathLike

Manifest = Any


@dataclass
class Constraint:
"""A single constraint that evaluates True or False for a fileset."""
operator: str


@dataclass
class Rule:
"""A single rule for a fileset."""
name: str
description: str
root: PathLike
constraints: List[Constraint]



@dataclass
class Manifest:
"""Manifest for a fileset, defining a fileset type."""

name: str
version: str
description: str
rules: List[Rule]
46 changes: 46 additions & 0 deletions datajoint_file_validator/manifests/demo_bob_lab_frames_v0.1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
id: https://raw.githubusercontent.com/datajoint/datajoint-file-validator/main/manifests/demo_bob_lab_frames_v0.1
name: bob_lab_frames
version: 0.1.0
description: Example of a fileset type for a Bob Lab fileset
author:
name: Ethan Ho
email: ethan@datajoint.com
rules:
- name: num_files_constraint
description: Check that there are between 1 and 5 files in the directory
count_min: 1
count_max: 5
- name: num_files_per_subdir
description: >
Check that each subdirectory has between 1 and 100 files
and that subdirectories only contain .mp4 files.
query:
path: "/*"
type: directory
rules:
- name: count_per_subdir
# Here, the query ("/**") is relative to the sub-directory
# instead of the root.
query: "/"
count_min: 1
count_max: 100
- name: check_file_type
# query: "/**" is implicit here
regex: "^.*\\.mp4$"
- name: file_type_in_subdir
description: Check that each subdirectory has exactly one obs.md file
query:
# Instead of path, regex, or type, we can write a custom query function
# that filters the items that are passed to the constraints.
eval: |
def get_subdirs(results: list[dict]) -> list[bool]:
return [True for r in results if r['type'] == 'directory' else False]
rules:
- query: "/obs.md"
description: "There is exactly one obs.md"
- query: "obs.md"
description: "Another way to write the above"
- query: "obs.md"
description: "Yet another way to write this rule"
count_min: 1
count_max: 1
42 changes: 42 additions & 0 deletions datajoint_file_validator/manifests/demo_dlc_v0.1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
id: https://raw.githubusercontent.com/datajoint/datajoint-file-validator/main/manifests/demo_dlc_v0.1
name: demo_dlc
version: 0.1.0
description: DeepLabCut demo fileset type
author:
name: Ethan Ho
email: ethan@datajoint.com
rules:
- name: "Min total files"
description: "Check that there are at least 6 files anywhere in the fileset"
# This is the default query, which returns every file, even if it is in
# a subdirectory.
query: "/**"
count_min: 6
- name: "Count .mp4 files"
description: "Check that there are at least 4 .mp4 files and their total size is at least 2.0K"
query: "/**/*.mp4"
# This rule has two constraints, which both must be satisfied for the rule
# to pass.
count_min: 4
total_size_min: 2.0K

# More complex rules below
- name: "Only .mp4 and .csv at top level"
# This query returns only files at the top level, and will not consider
# files in subdirectories.
query: "/*"
# Note that this regex will not match subdirectores.
regex: ".*\\.mp4$|.*\\.csv$"
- name: "Every video file must have a corresponding csv file"
description: "Check that every video file has a corresponding csv file"
# query: "/**" is implicit here
# Instead of other constraints, we can use eval to write a custom constraint function.
eval: |
def check_one_to_one(results: list[dict]) -> bool:
n_mp4 = len([r for r in results if r['extension'] == 'mp4'])
n_csv = len([r for r in results if r['extension'] == 'csv'])
return n_mp4 == n_csv
- name: "Size constraints"
description: "Check that the size of all files are within a certain range"
total_size_min: 2.0K
total_size_max: 10000
81 changes: 81 additions & 0 deletions datajoint_file_validator/manifests_schemas/v0.1.0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
id:
type: string
required: true
name:
type: string
required: true
version:
type: string
required: true
description:
type: string
required: true
author:
type: dict
schema:
name:
type: string
required: true
email:
type: string
required: true
required: true
rules:
type: dict
schema:
num_files_constraint:
type: dict
schema:
query:
type: string
required: true
count_min:
type: integer
required: true
count_max:
type: integer
required: true
required: true
num_files_per_subdir:
type: dict
schema:
description:
type: string
required: true
query:
type: string
required: true
rules:
type: list
schema:
type: dict
schema:
name:
type: string
required: true
query:
type: string
required: true
count_min:
type: integer
required: true
count_max:
type: integer
required: true
required: true
required: true
file_type_in_subdir:
type: dict
schema:
description:
type: string
required: true
query:
type: dict
schema:
eval:
type: string
required: true
required: true
required: true
required: true
13 changes: 13 additions & 0 deletions datajoint_file_validator/result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from dataclasses import dataclass
from typing import Dict, Any
import cerberus


@dataclass
class ValidationResult:
status: bool
errors: Any

@classmethod
def from_validator(cls, v: cerberus.Validator):
return cls(status=v.status, errors=v.errors)
Loading

0 comments on commit 68357d8

Please sign in to comment.