From 9b5e2b97ec93df2395e8e0e131b1fba173dda28a Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Tue, 9 May 2023 17:08:22 +0530 Subject: [PATCH 1/6] init --- deeplake/core/dataset/dataset.py | 3 +++ deeplake/core/transform/test_transform.py | 23 ++++++++++++++++++++ deeplake/core/transform/transform_dataset.py | 17 +++++++++++---- deeplake/core/transform/transform_tensor.py | 5 ++--- deeplake/util/class_label.py | 8 +++++-- 5 files changed, 47 insertions(+), 9 deletions(-) diff --git a/deeplake/core/dataset/dataset.py b/deeplake/core/dataset/dataset.py index dea37e2668..39ce2272cf 100644 --- a/deeplake/core/dataset/dataset.py +++ b/deeplake/core/dataset/dataset.py @@ -2812,6 +2812,9 @@ def append( if not isinstance(sample, dict): raise SampleAppendingError() + if skip_ok and append_empty: + raise ValueError("Only one of `skip_ok` and `append_empty` can be True.") + skipped_tensors = [k for k in tensors if k not in sample] if skipped_tensors and not skip_ok and not append_empty: raise KeyError( diff --git a/deeplake/core/transform/test_transform.py b/deeplake/core/transform/test_transform.py index 52030154f8..c1c6dc8493 100644 --- a/deeplake/core/transform/test_transform.py +++ b/deeplake/core/transform/test_transform.py @@ -13,6 +13,7 @@ from deeplake.util.check_installation import ray_installed from deeplake.util.exceptions import ( AllSamplesSkippedError, + EmptyTensorError, InvalidOutputDatasetError, TransformError, ) @@ -1549,3 +1550,25 @@ def upload(stuff, ds): np.testing.assert_array_equal(ds2.xyz.numpy(), ds.xyz.numpy()) ds2.delete() + + +def test_ds_append_empty(local_ds): + @deeplake.compute + def upload(stuff, ds): + ds.append(stuff, append_empty=True) + + with local_ds as ds: + ds.create_tensor("images", htype="image", sample_compression="png") + ds.create_tensor("label1", htype="class_label") + ds.create_tensor("label2", htype="class_label") + + samples = [{"images": np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8), "label1": 1} for _ in range(20)] + + upload().eval(samples, ds, num_workers=TRANSFORM_TEST_NUM_WORKERS) + + with pytest.raises(EmptyTensorError): + ds.label2.numpy() + + ds.label2.append(1) + + np.testing.assert_array_equal(ds.label2[:20].numpy(), np.array([]).reshape((20, 0))) diff --git a/deeplake/core/transform/transform_dataset.py b/deeplake/core/transform/transform_dataset.py index 8d73aac4b8..b700473350 100644 --- a/deeplake/core/transform/transform_dataset.py +++ b/deeplake/core/transform/transform_dataset.py @@ -1,6 +1,6 @@ +from deeplake.util.exceptions import SampleAppendError, TensorDoesNotExistError from deeplake.core.transform.transform_tensor import TransformTensor from deeplake.core.linked_tiled_sample import LinkedTiledSample -from deeplake.util.exceptions import SampleAppendError from deeplake.core.partial_sample import PartialSample from deeplake.core.linked_sample import LinkedSample from deeplake.core.sample import Sample @@ -59,11 +59,20 @@ def __iter__(self): for i in range(len(self)): yield self[i] - def append(self, sample): + def append(self, sample, skip_ok=False, append_empty=False): + if skip_ok: + raise ValueError("`skip_ok` is not supported for `ds.append` in transforms. Use `skip_ok` parameter of the `eval` method instead.") + if len(set(map(len, (self[k] for k in sample)))) != 1: - raise ValueError("All tensors are expected to have the same length.") + raise ValueError( + "All tensors are expected to have the same length before `ds.append`." + ) - for k, v in sample.items(): + for k in self.tensors: + if k in sample: + v = sample[k] + elif append_empty: + v = None self[k].append(v) def item_added(self, item): diff --git a/deeplake/core/transform/transform_tensor.py b/deeplake/core/transform/transform_tensor.py index 747f9e5361..ec8f5fff03 100644 --- a/deeplake/core/transform/transform_tensor.py +++ b/deeplake/core/transform/transform_tensor.py @@ -112,10 +112,9 @@ def _verify_item(self, item): def append(self, item): """Adds an item to the tensor.""" + if self.is_group: + raise TensorDoesNotExistError(self.name) try: - if self.is_group: - raise TensorDoesNotExistError(self.name) - # optimization applicable only if extending self.non_numpy_only() diff --git a/deeplake/util/class_label.py b/deeplake/util/class_label.py index 040fd8d5c9..a862f13a7c 100644 --- a/deeplake/util/class_label.py +++ b/deeplake/util/class_label.py @@ -2,6 +2,7 @@ from typing import List from deeplake.util.hash import hash_str_to_int32 +from deeplake.util.exceptions import EmptyTensorError from deeplake.client.log import logger import numpy as np import deeplake @@ -84,8 +85,11 @@ def class_label_sync( label_tensor: str, hash_idx_map, ): - hashes = hash_tensor_sample.numpy().tolist() - idxs = convert_hash_to_idx(hashes, hash_idx_map) + try: + hashes = hash_tensor_sample.numpy().tolist() + idxs = convert_hash_to_idx(hashes, hash_idx_map) + except EmptyTensorError: + idxs = None samples_out[label_tensor].append(idxs) for tensor, temp_tensor in label_temp_tensors.items(): From 4cdc27fd14dd1b94876fcb2d221ea3e065cb26ec Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Tue, 9 May 2023 17:10:34 +0530 Subject: [PATCH 2/6] black --- deeplake/core/transform/test_transform.py | 11 +++++++---- deeplake/core/transform/transform_dataset.py | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/deeplake/core/transform/test_transform.py b/deeplake/core/transform/test_transform.py index c1c6dc8493..0cec71347a 100644 --- a/deeplake/core/transform/test_transform.py +++ b/deeplake/core/transform/test_transform.py @@ -1556,19 +1556,22 @@ def test_ds_append_empty(local_ds): @deeplake.compute def upload(stuff, ds): ds.append(stuff, append_empty=True) - + with local_ds as ds: ds.create_tensor("images", htype="image", sample_compression="png") ds.create_tensor("label1", htype="class_label") ds.create_tensor("label2", htype="class_label") - samples = [{"images": np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8), "label1": 1} for _ in range(20)] + samples = [ + {"images": np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8), "label1": 1} + for _ in range(20) + ] upload().eval(samples, ds, num_workers=TRANSFORM_TEST_NUM_WORKERS) with pytest.raises(EmptyTensorError): ds.label2.numpy() - + ds.label2.append(1) - + np.testing.assert_array_equal(ds.label2[:20].numpy(), np.array([]).reshape((20, 0))) diff --git a/deeplake/core/transform/transform_dataset.py b/deeplake/core/transform/transform_dataset.py index b700473350..0d379e1774 100644 --- a/deeplake/core/transform/transform_dataset.py +++ b/deeplake/core/transform/transform_dataset.py @@ -61,7 +61,9 @@ def __iter__(self): def append(self, sample, skip_ok=False, append_empty=False): if skip_ok: - raise ValueError("`skip_ok` is not supported for `ds.append` in transforms. Use `skip_ok` parameter of the `eval` method instead.") + raise ValueError( + "`skip_ok` is not supported for `ds.append` in transforms. Use `skip_ok` parameter of the `eval` method instead." + ) if len(set(map(len, (self[k] for k in sample)))) != 1: raise ValueError( From 98d1c237eb7a5c1b671ef28a7abf5ce43bef4272 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Tue, 9 May 2023 18:26:58 +0530 Subject: [PATCH 3/6] test --- deeplake/core/transform/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeplake/core/transform/test_transform.py b/deeplake/core/transform/test_transform.py index 0cec71347a..955d19f9a8 100644 --- a/deeplake/core/transform/test_transform.py +++ b/deeplake/core/transform/test_transform.py @@ -1554,7 +1554,7 @@ def upload(stuff, ds): def test_ds_append_empty(local_ds): @deeplake.compute - def upload(stuff, ds): + def upload(stuff, ds): ds.append(stuff, append_empty=True) with local_ds as ds: From 623248fd908cc1745a39d7a2854dcd0afd33070d Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Tue, 9 May 2023 18:27:20 +0530 Subject: [PATCH 4/6] black --- deeplake/core/transform/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeplake/core/transform/test_transform.py b/deeplake/core/transform/test_transform.py index 955d19f9a8..0cec71347a 100644 --- a/deeplake/core/transform/test_transform.py +++ b/deeplake/core/transform/test_transform.py @@ -1554,7 +1554,7 @@ def upload(stuff, ds): def test_ds_append_empty(local_ds): @deeplake.compute - def upload(stuff, ds): + def upload(stuff, ds): ds.append(stuff, append_empty=True) with local_ds as ds: From e3ac93bb0ef9780c566a64c49c3c9d9d228b93b5 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Tue, 9 May 2023 18:29:36 +0530 Subject: [PATCH 5/6] rm --- deeplake/core/dataset/dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/deeplake/core/dataset/dataset.py b/deeplake/core/dataset/dataset.py index 39ce2272cf..dea37e2668 100644 --- a/deeplake/core/dataset/dataset.py +++ b/deeplake/core/dataset/dataset.py @@ -2812,9 +2812,6 @@ def append( if not isinstance(sample, dict): raise SampleAppendingError() - if skip_ok and append_empty: - raise ValueError("Only one of `skip_ok` and `append_empty` can be True.") - skipped_tensors = [k for k in tensors if k not in sample] if skipped_tensors and not skip_ok and not append_empty: raise KeyError( From 747f0f3c0d3d8bb9626acc629900141a3c2683f8 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Tue, 9 May 2023 19:55:52 +0530 Subject: [PATCH 6/6] fix --- deeplake/core/transform/transform_dataset.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/deeplake/core/transform/transform_dataset.py b/deeplake/core/transform/transform_dataset.py index 0d379e1774..7c1affc355 100644 --- a/deeplake/core/transform/transform_dataset.py +++ b/deeplake/core/transform/transform_dataset.py @@ -72,10 +72,9 @@ def append(self, sample, skip_ok=False, append_empty=False): for k in self.tensors: if k in sample: - v = sample[k] + self[k].append(sample[k]) elif append_empty: - v = None - self[k].append(v) + self[k].append(None) def item_added(self, item): if isinstance(item, Sample):