Skip to content

Commit

Permalink
- changed DataStore.get_data_ids() to return a Iterator[Tuple[str, Op…
Browse files Browse the repository at this point in the history
…tional[str]]]

- added DataDescriptor.open_params_schema
  • Loading branch information
forman committed Jun 26, 2020
1 parent 36a8faf commit 3bcb44f
Show file tree
Hide file tree
Showing 12 changed files with 59 additions and 40 deletions.
6 changes: 3 additions & 3 deletions test/cli/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def test_data(self):
' Directory data store\n'
'\n'
'Data resources:\n'
' cube-1-250-250.zarr\n'
' cube-5-100-200.zarr\n'
' cube.nc\n'
' cube-1-250-250.zarr <no title>\n'
' cube-5-100-200.zarr <no title>\n'
' cube.nc <no title>\n'
'3 data resources found.\n',
result.stdout)

Expand Down
2 changes: 1 addition & 1 deletion test/core/store/accessors/test_gdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def setUp(self) -> None:
})

def test_get_data_ids(self):
self.assertEqual({'cube_1', 'cube_2'}, set(self.data_store.get_data_ids()))
self.assertEqual({('cube_1',None), ('cube_2', None)}, set(self.data_store.get_data_ids()))

def test_open_data(self):
cube_1 = self.data_store.open_data('cube_1')
Expand Down
6 changes: 3 additions & 3 deletions test/core/store/stores/test_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def test_get_data_writer_ids(self):
def test_get_data_ids(self):
self.assertEqual(
{
'cube-1-250-250.zarr',
'cube-5-100-200.zarr',
'cube.nc'
('cube-1-250-250.zarr', None),
('cube-5-100-200.zarr', None),
('cube.nc', None),
},
set(self.store.get_data_ids()))
2 changes: 1 addition & 1 deletion test/core/store/stores/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_get_type_ids(self):
self.assertEqual(('*',), self.store.get_type_ids())

def test_get_data_ids(self):
self.assertEqual({'cube_1', 'cube_2'}, set(self.store.get_data_ids()))
self.assertEqual({('cube_1', None), ('cube_2', None)}, set(self.store.get_data_ids()))

def test_has_data(self):
self.assertEqual(True, self.store.has_data('cube_1'))
Expand Down
8 changes: 4 additions & 4 deletions test/core/store/stores/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ def test_write_and_read_and_delete(self):
self.store.write_data(dataset_2, data_id='cube-2.zarr')
self.store.write_data(dataset_3, data_id='cube-3.zarr')

self.assertEqual({'cube-1.zarr',
'cube-2.zarr',
'cube-3.zarr'},
self.assertEqual({('cube-1.zarr', None),
('cube-2.zarr', None),
('cube-3.zarr', None)},
set(self.store.get_data_ids()))

self.assertTrue(self.store.has_data('cube-1.zarr'))
Expand Down Expand Up @@ -124,7 +124,7 @@ def test_write_and_read_and_delete(self):

# Try deleting cube 1
self.store.delete_data('cube-1.zarr')
self.assertEqual({'cube-2.zarr', 'cube-3.zarr'},
self.assertEqual({('cube-2.zarr', None), ('cube-3.zarr', None)},
set(self.store.get_data_ids()))
self.assertFalse(self.store.has_data('cube-1.zarr'))

Expand Down
12 changes: 6 additions & 6 deletions xcube/cli/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
from xcube.constants import EXTENSION_POINT_DATA_WRITERS
from xcube.util.plugin import get_extension_registry

_UNKNOWN_EXTENSION = "<unknown!>"

_NO_TITLE = "<no title>"
_NO_DESCRIPTION = "<no description>"
_UNKNOWN_EXTENSION = "<unknown!>"


@click.command(name='list')
Expand Down Expand Up @@ -293,8 +293,8 @@ def _dump_store_writers(data_store: 'xcube.core.store.DataStore') -> int:

def _dump_store_data_ids(data_store: 'xcube.core.store.DataStore') -> int:
count = 0
for data_id in data_store.get_data_ids():
print(f' {data_id}')
for data_id, title in data_store.get_data_ids():
print(f' {data_id:>32s} {title or _NO_TITLE}')
count += 1
return count

Expand All @@ -313,8 +313,8 @@ def _dump_named_extensions(point: str, names: Sequence[str]) -> int:

def _dump_data_resources(data_store: 'xcube.core.store.DataStore') -> int:
count = 0
for data_id in data_store.get_data_ids():
print(f' {data_id}')
for data_id, title in data_store.get_data_ids():
print(f' {data_id:<32s} {title or _NO_TITLE}')
count += 1
return count

Expand Down
18 changes: 9 additions & 9 deletions xcube/core/store/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_data_accessor_predicate(type_id: str = None,
:param format_id: Optional data format identifier to be supported.
:param storage_id: Optional data storage identifier to be supported.
:return: A filter function.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""
if any((type_id, format_id, storage_id)):
def _predicate(extension: Extension) -> bool:
Expand Down Expand Up @@ -159,7 +159,7 @@ def get_open_data_params_schema(self, data_id: str = None) -> JsonObjectSchema:
:param data_id: An optional data resource identifier.
:return: The schema for the parameters in *open_params*.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""

@abstractmethod
Expand All @@ -172,7 +172,7 @@ def open_data(self, data_id: str, **open_params) -> xr.Dataset:
:param data_id: The data resource identifier.
:param open_params: Opener-specific parameters.
:return: An xarray.Dataset instance.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""


Expand All @@ -187,7 +187,7 @@ def delete_data(self, data_id: str):
Delete a data resource. Raises if *data_id* does not exist.
:param data_id: A data resource identifier known to exist.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""


Expand All @@ -206,7 +206,7 @@ def get_write_data_params_schema(self) -> JsonObjectSchema:
:meth:write_data(data resource, data_id, open_params).
:return: The schema for the parameters in *write_params*.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""

@abstractmethod
Expand All @@ -223,7 +223,7 @@ def write_data(self,
:param replace: Whether to replace an existing data resource.
:param write_params: Writer-specific parameters.
:return: The data resource identifier used to write the data resource.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""


Expand All @@ -239,7 +239,7 @@ def append_data_time_slice(self, data_id: str, time_slice: xr.Dataset):
:param data_id: The data resource identifier.
:param time_slice: The time slice data to be inserted. Must be compatible with the data resource.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""

@abstractmethod
Expand All @@ -250,7 +250,7 @@ def insert_data_time_slice(self, data_id: str, time_slice: Any, time_index: int)
:param data_id: The data resource identifier.
:param time_slice: The time slice data to be inserted. Must be compatible with the data resource.
:param time_index: The time index.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""

@abstractmethod
Expand All @@ -261,5 +261,5 @@ def replace_data_time_slice(self, data_id: str, time_slice: Any, time_index: int
:param data_id: The data resource identifier.
:param time_slice: The time slice data to be inserted. Must be compatible with the data resource.
:param time_index: The time index.
:raise DataAccessorError: If an error occurs.
:raise DataStoreError: If an error occurs.
"""
25 changes: 20 additions & 5 deletions xcube/core/store/descriptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from xcube.util.assertions import assert_given
from xcube.util.assertions import assert_in
from xcube.util.ipython import register_json_formatter
from xcube.util.jsonschema import JsonObjectSchema

TYPE_ID_DATASET = 'dataset'
TYPE_ID_MULTI_LEVEL_DATASET = 'mldataset'
Expand Down Expand Up @@ -78,7 +79,8 @@ def __init__(self,
bbox: Tuple[float, float, float, float] = None,
spatial_res: float = None,
time_range: Tuple[Optional[str], Optional[str]] = None,
time_period: str = None):
time_period: str = None,
open_params_schema: JsonObjectSchema = None):
assert_given(data_id, 'data_id')
assert_given(type_id, 'type_id')
self.data_id = data_id
Expand All @@ -88,6 +90,7 @@ def __init__(self,
self.spatial_res = spatial_res
self.time_range = tuple(time_range) if time_range else None
self.time_period = time_period
self.open_params_schema = open_params_schema

@classmethod
def from_dict(cls, d: Mapping[str, Any]) -> 'DatasetDescriptor':
Expand All @@ -101,6 +104,8 @@ def to_dict(self) -> Dict[str, Any]:
_copy_none_null_props(self, d, ['data_id', 'type_id',
'crs', 'bbox', 'spatial_res',
'time_range', 'time_period'])
if self.open_params_schema is not None:
d['open_params_schema'] = self.open_params_schema.to_dict()
return d


Expand All @@ -113,12 +118,21 @@ class DatasetDescriptor(DataDescriptor):
def __init__(self,
data_id: str,
type_id=TYPE_ID_DATASET,
crs: str = None,
bbox: Tuple[float, float, float, float] = None,
spatial_res: float = None,
time_range: Tuple[Optional[str], Optional[str]] = None,
time_period: str = None,
dims: Mapping[str, int] = None,
data_vars: Sequence['VariableDescriptor'] = None,
attrs: Mapping[str, any] = None,
**kwargs):
assert_given(data_id, 'data_id')
super().__init__(data_id=data_id, type_id=type_id, **kwargs)
attrs: Mapping[str, any] = None):
super().__init__(data_id=data_id,
type_id=type_id,
crs=crs,
bbox=bbox,
spatial_res=spatial_res,
time_range=time_range,
time_period=time_period)
self.dims = dict(dims) if dims else None
self.data_vars = list(data_vars) if data_vars else None
self.attrs = dict(attrs) if attrs else None
Expand All @@ -127,6 +141,7 @@ def __init__(self,
def from_dict(cls, d: Mapping[str, Any]) -> 'DatasetDescriptor':
"""Create new instance from a JSON-serializable dictionary"""
# TODO: implement me
raise NotImplementedError()

def to_dict(self) -> Dict[str, Any]:
"""Convert into a JSON-serializable dictionary"""
Expand Down
7 changes: 5 additions & 2 deletions xcube/core/store/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,18 @@ def get_type_ids(cls) -> Tuple[str, ...]:
"""

@abstractmethod
def get_data_ids(self, type_id: str = None) -> Iterator[str]:
def get_data_ids(self, type_id: str = None) -> Iterator[Tuple[str, Optional[str]]]:
"""
Get an iterator over the data resource identifiers for the given type *type_id*.
If *type_id* is omitted, all data resource identifiers are returned.
If a store implementation supports only a single data type, it should verify that *type_id* is either None
or equal to that single data type.
:return: An iterator over the identifiers if data resources provided by this data store.
The returned iterator items are 2-tuples of the form (*data_id*, *title*), where *data_id*
is the actual data identifier and *title* is an optional, human-readable title for the data.
:return: An iterator over the identifiers and titles of data resources provided by this data store.
:raise DataStoreError: If an error occurs.
"""

Expand Down
4 changes: 2 additions & 2 deletions xcube/core/store/stores/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,15 @@ def get_data_store_params_schema(cls) -> JsonObjectSchema:
def get_type_ids(cls) -> Tuple[str, ...]:
return TYPE_ID_DATASET, TYPE_ID_MULTI_LEVEL_DATASET, TYPE_ID_GEO_DATA_FRAME

def get_data_ids(self, type_id: str = None) -> Iterator[str]:
def get_data_ids(self, type_id: str = None) -> Iterator[Tuple[str, Optional[str]]]:
self._assert_valid_type_id(type_id)
# TODO: Use os.walk(), which provides a generator rather than a list
for data_id in os.listdir(self._base_dir):
accessor_id_parts = self._get_accessor_id_parts(data_id, require=False)
if accessor_id_parts:
actual_type_id, _, _ = accessor_id_parts
if type_id is None or actual_type_id == type_id:
yield data_id
yield data_id, None

def has_data(self, data_id: str) -> bool:
assert_given(data_id, 'data_id')
Expand Down
5 changes: 3 additions & 2 deletions xcube/core/store/stores/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def get_data_store_params_schema(cls) -> JsonObjectSchema:
def get_type_ids(cls) -> Tuple[str, ...]:
return '*',

def get_data_ids(self, type_id: str = None) -> Iterator[str]:
return iter(self._data_dict.keys())
def get_data_ids(self, type_id: str = None) -> Iterator[Tuple[str, Optional[str]]]:
for data_id in self._data_dict.keys():
yield data_id, None

def has_data(self, data_id: str) -> bool:
assert_given(data_id, 'data_id')
Expand Down
4 changes: 2 additions & 2 deletions xcube/core/store/stores/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ def get_data_store_params_schema(cls) -> JsonObjectSchema:
def get_type_ids(cls) -> Tuple[str, ...]:
return TYPE_ID_DATASET,

def get_data_ids(self, type_id: str = None) -> Iterator[str]:
def get_data_ids(self, type_id: str = None) -> Iterator[Tuple[str, Optional[str]]]:
prefix = self._bucket_name + '/'
first_index = len(prefix)
for item in self._s3_fs.listdir(self._bucket_name, detail=False):
if item.startswith(prefix):
yield item[first_index:]
yield item[first_index:], None

def has_data(self, data_id: str) -> bool:
path = self._resolve_data_id_to_path(data_id)
Expand Down

0 comments on commit 3bcb44f

Please sign in to comment.