From b678875f429208d660e8e69e46ee5d06ebc33a87 Mon Sep 17 00:00:00 2001 From: Levon Ghukasyan Date: Wed, 8 May 2024 10:18:43 +0000 Subject: [PATCH 1/2] added handling of object dtype in decode method data --- deeplake/core/storage/gcs.py | 4 +++- deeplake/enterprise/dataloader.py | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/deeplake/core/storage/gcs.py b/deeplake/core/storage/gcs.py index a20cab7d22..8713994999 100644 --- a/deeplake/core/storage/gcs.py +++ b/deeplake/core/storage/gcs.py @@ -528,5 +528,7 @@ def get_object_from_full_url(self, url: str): def get_creds(self): d = self.scoped_credentials.get_token_info() - d["expiration"] = self.expiration or "" + d["expiration"] = ( + self.expiration if hasattr(self, "expiration") and self.expiration else "" + ) return d diff --git a/deeplake/enterprise/dataloader.py b/deeplake/enterprise/dataloader.py index a62844b79f..4b519d7df4 100644 --- a/deeplake/enterprise/dataloader.py +++ b/deeplake/enterprise/dataloader.py @@ -801,6 +801,29 @@ def __get_indra_dataloader( info=info, ) + def _fill_sample_info_tensors( + self, + dataset, + sample_info_tensors, + json_tensors, + list_tensors, + ): + for tensor_name in sample_info_tensors: + tensor = dataset._get_tensor_from_root(tensor_name) + if len(tensor) == 0: + raise EmptyTensorError( + f" the dataset has an empty tensor {tensor_name}, pytorch dataloader can't be created." + f" Please either populate the tensor or pass tensors argument to .pytorch that excludes this" + f" tensor." + ) + meta = tensor.meta + if meta.htype == "json": + json_tensors.append(tensor_name) + elif meta.htype == "list": + list_tensors.append(tensor_name) + elif meta.htype == "tag": + list_tensors.append(tensor_name) + def __iter__(self): if self._dataloader is None: dataset = self.dataset @@ -825,6 +848,9 @@ def __iter__(self): sample_info_tensors, tensor_info_tensors = find_additional_tensors_and_info( dataset, data_tensors ) + self._fill_sample_info_tensors( + dataset, sample_info_tensors, json_tensors, list_tensors + ) tensors.extend(sample_info_tensors) htype_dict, ndim_dict, tensor_info_dict = get_htype_ndim_tensor_info_dicts( dataset, data_tensors, tensor_info_tensors From 155e0405b93c8aa93676ed592366b1464a4fade7 Mon Sep 17 00:00:00 2001 From: Levon Ghukasyan Date: Wed, 8 May 2024 17:04:44 +0000 Subject: [PATCH 2/2] merged common logic --- deeplake/enterprise/dataloader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/deeplake/enterprise/dataloader.py b/deeplake/enterprise/dataloader.py index 4b519d7df4..0b0ab8c81e 100644 --- a/deeplake/enterprise/dataloader.py +++ b/deeplake/enterprise/dataloader.py @@ -819,9 +819,7 @@ def _fill_sample_info_tensors( meta = tensor.meta if meta.htype == "json": json_tensors.append(tensor_name) - elif meta.htype == "list": - list_tensors.append(tensor_name) - elif meta.htype == "tag": + elif meta.htype in ["list", "tag"]: list_tensors.append(tensor_name) def __iter__(self):