-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* unpin sphinx versions (#134) * Verification pipeline boilerplate (#126) * Verification pipeline boilerplate * Input catalog options. * Contribution docs updates. * Merge recent changes (#135) * Use minimum stage name formatting. * Run copier. * Add a tad more context for failure email. * Pin pandas version * Remove benchmarks for now. * unpin sphinx versions (#134) --------- Co-authored-by: Max West <110124344+maxwest-uw@users.noreply.github.com> --------- Co-authored-by: Max West <110124344+maxwest-uw@users.noreply.github.com> * Check for valid catalog directories. (#136) * Check for valid catalog directories. * Hit uncovered file check. --------- Co-authored-by: Max West <110124344+maxwest-uw@users.noreply.github.com>
- Loading branch information
1 parent
08e41a1
commit 24b1f4e
Showing
16 changed files
with
255 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
"""Utility to hold all arguments required throughout verification pipeline""" | ||
|
||
from dataclasses import dataclass, field | ||
from typing import List, Optional | ||
|
||
from hipscat.catalog import Catalog | ||
|
||
from hipscat_import.runtime_arguments import RuntimeArguments | ||
|
||
|
||
@dataclass | ||
class VerificationArguments(RuntimeArguments): | ||
"""Data class for holding verification arguments""" | ||
|
||
## Input | ||
input_catalog_path: str = "" | ||
"""Path to an existing catalog that will be inspected.""" | ||
input_catalog: Optional[Catalog] = None | ||
"""In-memory representation of a catalog. If not provided, it will be loaded | ||
from the input_catalog_path.""" | ||
|
||
## Verification options | ||
field_distribution_cols: List[str] = field(default_factory=list) | ||
"""List of fields to get the overall distribution for. e.g. ["ra", "dec"]. | ||
Should be valid columns in the parquet files.""" | ||
|
||
def __post_init__(self): | ||
self._check_arguments() | ||
|
||
def _check_arguments(self): | ||
super()._check_arguments() | ||
if not self.input_catalog_path and not self.input_catalog: | ||
raise ValueError("input catalog is required (either input_catalog_path or input_catalog)") | ||
if not self.input_catalog: | ||
self.input_catalog = Catalog.read_from_hipscat(catalog_path=self.input_catalog_path) | ||
if not self.input_catalog_path: | ||
self.input_catalog_path = self.input_catalog.catalog_path | ||
|
||
def additional_runtime_provenance_info(self) -> dict: | ||
return { | ||
"pipeline": "verification pipeline", | ||
"input_catalog_path": str(self.input_catalog_path), | ||
"field_distribution_cols": self.field_distribution_cols, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
"""Run pass/fail checks and generate verification report of existing hipscat table.""" | ||
|
||
from hipscat_import.verification.arguments import VerificationArguments | ||
|
||
|
||
def run(args): | ||
"""Run verification pipeline.""" | ||
if not args: | ||
raise TypeError("args is required and should be type VerificationArguments") | ||
if not isinstance(args, VerificationArguments): | ||
raise TypeError("args must be type VerificationArguments") | ||
|
||
# implement everything else. | ||
raise NotImplementedError("Verification not yet implemented.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
tests/hipscat_import/verification/test_run_verification.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import pytest | ||
|
||
from hipscat_import.verification.arguments import VerificationArguments | ||
import hipscat_import.verification.run_verification as runner | ||
|
||
|
||
def test_bad_args(): | ||
"""Runner should fail with empty or mis-typed arguments""" | ||
with pytest.raises(TypeError, match="VerificationArguments"): | ||
runner.run(None) | ||
|
||
args = {"output_catalog_name": "bad_arg_type"} | ||
with pytest.raises(TypeError, match="VerificationArguments"): | ||
runner.run(args) | ||
|
||
|
||
def test_no_implementation(tmp_path, small_sky_object_catalog): | ||
"""Womp womp. Test that we don't have a verification pipeline implemented""" | ||
args = VerificationArguments( | ||
input_catalog_path=small_sky_object_catalog, | ||
output_path=tmp_path, | ||
output_catalog_name="small_sky_object_verification_report", | ||
) | ||
with pytest.raises(NotImplementedError, match="not yet implemented"): | ||
runner.run(args) |
80 changes: 80 additions & 0 deletions
80
tests/hipscat_import/verification/test_verification_arguments.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
"""Tests of argument validation""" | ||
|
||
|
||
import pytest | ||
from hipscat.catalog import Catalog | ||
|
||
from hipscat_import.verification.arguments import VerificationArguments | ||
|
||
|
||
def test_none(): | ||
"""No arguments provided. Should error for required args.""" | ||
with pytest.raises(ValueError): | ||
VerificationArguments() | ||
|
||
|
||
def test_empty_required(tmp_path): | ||
"""*Most* required arguments are provided.""" | ||
## Input path is missing | ||
with pytest.raises(ValueError, match="input_catalog_path"): | ||
VerificationArguments( | ||
output_path=tmp_path, | ||
output_catalog_name="small_sky_object_verification_report", | ||
) | ||
|
||
|
||
def test_invalid_paths(tmp_path, small_sky_object_catalog): | ||
"""Required arguments are provided, but paths aren't found.""" | ||
## Prove that it works with required args | ||
VerificationArguments( | ||
input_catalog_path=small_sky_object_catalog, | ||
output_path=tmp_path, | ||
output_catalog_name="small_sky_object_verification_report", | ||
) | ||
|
||
## Bad input path | ||
with pytest.raises(FileNotFoundError): | ||
VerificationArguments( | ||
input_catalog_path="path", | ||
output_path="path", | ||
output_catalog_name="small_sky_object_verification_report", | ||
) | ||
|
||
|
||
def test_good_paths(tmp_path, small_sky_object_catalog): | ||
"""Required arguments are provided, and paths are found.""" | ||
tmp_path_str = str(tmp_path) | ||
args = VerificationArguments( | ||
input_catalog_path=small_sky_object_catalog, | ||
output_path=tmp_path, | ||
output_catalog_name="small_sky_object_verification_report", | ||
) | ||
assert args.input_catalog_path == small_sky_object_catalog | ||
assert str(args.output_path) == tmp_path_str | ||
assert str(args.tmp_path).startswith(tmp_path_str) | ||
|
||
|
||
def test_catalog_object(tmp_path, small_sky_object_catalog): | ||
"""Required arguments are provided, and paths are found.""" | ||
small_sky_catalog_object = Catalog.read_from_hipscat(catalog_path=small_sky_object_catalog) | ||
tmp_path_str = str(tmp_path) | ||
args = VerificationArguments( | ||
input_catalog=small_sky_catalog_object, | ||
output_path=tmp_path, | ||
output_catalog_name="small_sky_object_verification_report", | ||
) | ||
assert args.input_catalog_path == small_sky_object_catalog | ||
assert str(args.output_path) == tmp_path_str | ||
assert str(args.tmp_path).startswith(tmp_path_str) | ||
|
||
|
||
def test_provenance_info(small_sky_object_catalog, tmp_path): | ||
"""Verify that provenance info includes verification-specific fields.""" | ||
args = VerificationArguments( | ||
input_catalog_path=small_sky_object_catalog, | ||
output_path=tmp_path, | ||
output_catalog_name="small_sky_object_verification_report", | ||
) | ||
|
||
runtime_args = args.provenance_info()["runtime_args"] | ||
assert "input_catalog_path" in runtime_args |