Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for valid catalog directories. #136

Merged
merged 2 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/hipscat_import/index/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from hipscat.catalog import Catalog
from hipscat.catalog.index.index_catalog_info import IndexCatalogInfo
from hipscat.io.validation import is_valid_catalog

from hipscat_import.runtime_arguments import RuntimeArguments

Expand Down Expand Up @@ -38,6 +39,8 @@ def _check_arguments(self):
if not self.include_hipscat_index and not self.include_order_pixel:
raise ValueError("At least one of include_hipscat_index or include_order_pixel must be True")

if not is_valid_catalog(self.input_catalog_path):
raise ValueError("input_catalog_path not a valid catalog")
self.input_catalog = Catalog.read_from_hipscat(catalog_path=self.input_catalog_path)

if self.compute_partition_size < 100_000:
Expand Down
8 changes: 5 additions & 3 deletions src/hipscat_import/margin_cache/margin_cache_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
from hipscat.catalog import Catalog
from hipscat.catalog.margin_cache.margin_cache_catalog_info import MarginCacheCatalogInfo
from hipscat.io import file_io
from hipscat.io.validation import is_valid_catalog

from hipscat_import.runtime_arguments import RuntimeArguments

Expand Down Expand Up @@ -33,8 +33,10 @@ def __post_init__(self):

def _check_arguments(self):
super()._check_arguments()
if not file_io.does_file_or_directory_exist(self.input_catalog_path):
raise FileNotFoundError("input_catalog_path not found on local storage")
if not self.input_catalog_path:
raise ValueError("input_catalog_path is required")
if not is_valid_catalog(self.input_catalog_path):
raise ValueError("input_catalog_path not a valid catalog")

self.catalog = Catalog.read_from_hipscat(self.input_catalog_path)

Expand Down
5 changes: 5 additions & 0 deletions src/hipscat_import/soap/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from hipscat.catalog.association_catalog.association_catalog import AssociationCatalogInfo
from hipscat.catalog.catalog_type import CatalogType
from hipscat.io.validation import is_valid_catalog

from hipscat_import.runtime_arguments import RuntimeArguments

Expand Down Expand Up @@ -33,11 +34,15 @@ def _check_arguments(self):
raise ValueError("object_catalog_dir is required")
if not self.object_id_column:
raise ValueError("object_id_column is required")
if not is_valid_catalog(self.object_catalog_dir):
raise ValueError("object_catalog_dir not a valid catalog")

if not self.source_catalog_dir:
raise ValueError("source_catalog_dir is required")
if not self.source_object_id_column:
raise ValueError("source_object_id_column is required")
if not is_valid_catalog(self.source_catalog_dir):
raise ValueError("source_catalog_dir not a valid catalog")

if self.compute_partition_size < 100_000:
raise ValueError("compute_partition_size must be at least 100_000")
Expand Down
8 changes: 8 additions & 0 deletions tests/hipscat_import/catalog/test_file_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ def test_unknown_file_type():
get_file_reader("unknown")


def test_file_exists(small_sky_dir):
"""File reader factory method should fail for missing files or directories"""
with pytest.raises(FileNotFoundError, match="File not found"):
next(CsvReader().read("foo_not_really_a_path"))
with pytest.raises(FileNotFoundError, match="Directory found at path"):
next(CsvReader().read(small_sky_dir))


def test_csv_reader(small_sky_single_file):
"""Verify we can read the csv file into a single data frame."""
total_chunks = 0
Expand Down
11 changes: 11 additions & 0 deletions tests/hipscat_import/index/test_index_argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ def test_empty_required(tmp_path, small_sky_object_catalog):
output_catalog_name="small_sky_object_index",
)

## Input path is bad
with pytest.raises(ValueError, match="input_catalog_path"):
IndexArguments(
input_catalog_path="/foo",
indexing_column="id",
output_path=tmp_path,
output_catalog_name="small_sky_object_index",
overwrite=True,
)

## Indexing column is required.
with pytest.raises(ValueError, match="indexing_column "):
IndexArguments(
input_catalog_path=small_sky_object_catalog,
Expand Down
12 changes: 11 additions & 1 deletion tests/hipscat_import/margin_cache/test_arguments_margin_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,23 @@
def test_empty_required(tmp_path):
"""*Most* required arguments are provided."""
## Input catalog path is missing
with pytest.raises(FileNotFoundError, match="input_catalog_path"):
with pytest.raises(ValueError, match="input_catalog_path"):
MarginCacheArguments(
margin_threshold=5.0,
output_path=tmp_path,
output_catalog_name="catalog_cache",
)

## Input catalog path is bad
with pytest.raises(ValueError, match="input_catalog_path"):
MarginCacheArguments(
input_catalog_path="/foo",
margin_threshold=5.0,
output_path=tmp_path,
output_catalog_name="catalog_cache",
overwrite=True,
)


def test_margin_order_dynamic(small_sky_source_catalog, tmp_path):
"""Ensure we can dynamically set the margin_order"""
Expand Down
29 changes: 29 additions & 0 deletions tests/hipscat_import/soap/test_soap_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,35 @@ def test_empty_required(tmp_path, small_sky_object_catalog, small_sky_source_cat
)


def test_catalog_paths(tmp_path, small_sky_object_catalog, small_sky_source_catalog):
"""*Most* required arguments are provided."""
## Object catalog path is bad.
with pytest.raises(ValueError, match="object_catalog_dir"):
SoapArguments(
object_catalog_dir="/foo",
object_id_column="id",
source_catalog_dir=small_sky_source_catalog,
source_object_id_column="object_id",
output_catalog_name="small_sky_association",
output_path=tmp_path,
progress_bar=False,
overwrite=True,
)

## Source catalog path is bad.
with pytest.raises(ValueError, match="source_catalog_dir"):
SoapArguments(
object_catalog_dir=small_sky_object_catalog,
object_id_column="id",
source_catalog_dir="/foo",
source_object_id_column="object_id",
output_catalog_name="small_sky_association",
output_path=tmp_path,
progress_bar=False,
overwrite=True,
)


def test_compute_partition_size(tmp_path, small_sky_object_catalog, small_sky_source_catalog):
"""Test validation of compute_partition_size."""
with pytest.raises(ValueError, match="compute_partition_size"):
Expand Down