Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added .offset parameter to enterprise dataloader #2579

Merged
merged 8 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion deeplake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,11 @@
"deepcopy",
"like",
"list",
"ingest",
"ingest_classification",
"ingest_coco",
"ingest_yolo",
"ingest_kaggle",
"ingest_dataframe",
"ingest_huggingface",
"compressions",
"htypes",
Expand Down
6 changes: 3 additions & 3 deletions deeplake/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def test_persist(ds_generator):

ds2 = ds_generator()

ds2.storage["dataset_meta.json"] == ds_new.storage["dataset_meta.json"]
assert len(ds2) == 4
assert_array_equal(ds2.label.numpy(), np.array([[1], [2], [3], [4]]))

Expand Down Expand Up @@ -983,7 +982,6 @@ def test_dataset_deepcopy(path, hub_token, num_workers, progressbar):
dest_path = "_".join((path, "dest1"))

src_ds = deeplake.empty(src_path, overwrite=True, token=hub_token)
# dest_ds = deeplake.empty(dest_path, overwrite=True, token=hub_token)

with src_ds:
src_ds.info.update(key=0)
Expand Down Expand Up @@ -1922,7 +1920,9 @@ def test_dataset_copy(
[
("local_ds_generator", "local_path", "hub_cloud_dev_token"),
pytest.param(
"s3_ds_generator", "s3_path", "hub_cloud_dev_token",
"s3_ds_generator",
"s3_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
Expand Down
2 changes: 0 additions & 2 deletions deeplake/api/tests/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,6 @@ def test_update_reference_manually(local_ds_generator):
l.append(99)
ds.info.update()

ds = local_ds_generator()

assert l == [1, 2, 3, 99]


Expand Down
2 changes: 0 additions & 2 deletions deeplake/api/tests/test_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ def test_subsequent_updates(local_ds_generator):
assert len(ds) == 10
assert ds.tensor.shape == (10, 100, 100)

ds = local_ds_generator()

with local_ds_generator() as ds:
for _ in range(5):
ds.tensor.append(np.ones((100, 200)))
Expand Down
14 changes: 7 additions & 7 deletions deeplake/api/tests/test_reset.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ def test_load_corrupt_dataset(path):
save_head = ds.pending_commit_id

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(path, access_method=access_method)
deeplake.load(path, access_method=access_method)

with pytest.raises(ReadOnlyModeError):
ds = deeplake.load(
deeplake.load(
path, read_only=True, access_method=access_method, reset=True
)

Expand Down Expand Up @@ -116,7 +116,7 @@ def test_load_corrupted_branch(local_path):
save_head = ds.pending_commit_id

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(f"{local_path}@alt")
deeplake.load(f"{local_path}@alt")

ds = deeplake.load(f"{local_path}@alt", reset=True)
verify_reset_on_checkout(ds, "alt", main_2, save_head, {"abc": [[1], [2]]})
Expand All @@ -131,10 +131,10 @@ def test_load_corrupted_branch(local_path):
save_head = ds.pending_commit_id

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(f"{local_path}@alt")
deeplake.load(f"{local_path}@alt")

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(f"{local_path}@{save_head}")
deeplake.load(f"{local_path}@{save_head}")

ds = deeplake.load(f"{local_path}@alt", reset=True)
verify_reset_on_checkout(ds, "alt", alt_2, save_head, {"abc": [[1], [2], [3], [4]]})
Expand Down Expand Up @@ -200,10 +200,10 @@ def test_load_corrupt_dataset_with_no_commits(local_path):
corrupt_ds(ds, "abc", 1)

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(local_path)
deeplake.load(local_path)

with pytest.raises(ReadOnlyModeError):
ds = deeplake.load(local_path, read_only=True, reset=True)
deeplake.load(local_path, read_only=True, reset=True)

ds = deeplake.load(local_path, reset=True)

Expand Down
5 changes: 2 additions & 3 deletions deeplake/api/tests/test_update_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@ def _make_update_assert_equal(
# this is necessary because `expected` uses `aslist=True` to handle dynamic cases.
# with `aslist=False`, this wouldn't be necessary.
expected_value = value
if hasattr(value, "__len__"):
if len(value) == 1:
expected_value = value[0]
if hasattr(value, "__len__") and len(value) == 1:
expected_value = value[0]

# make updates
tensor[index] = value
Expand Down
4 changes: 2 additions & 2 deletions deeplake/api/tests/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_video_timestamps(vstream_path, hub_token):
ds = deeplake.load(vstream_path, read_only=True, token=hub_token)

with pytest.raises(ValueError):
stamps = ds.mp4_videos[:2].timestamps
ds.mp4_videos[:2].timestamps

stamps = ds.large_video[0, 12000:1199:-100].timestamps

Expand All @@ -131,7 +131,7 @@ def test_video_exception(local_ds):
with local_ds as ds:
ds.create_tensor("abc")
with pytest.raises(Exception):
stamps = ds.abc.timestamps
ds.abc.timestamps


@pytest.mark.skipif(
Expand Down
2 changes: 1 addition & 1 deletion deeplake/auto/structured/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _get_most_frequent_image_extension(self, fn_iterator):

if len(fn_iterator) == 0:
raise IngestionError(
f"Cannot determine the most frequent image compression because no valid image files were provided."
"Cannot determine the most frequent image compression because no valid image files were provided."
)

supported_image_extensions = tuple(
Expand Down
2 changes: 1 addition & 1 deletion deeplake/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ def test_cli_auth(hub_cloud_dev_credentials, hub_cloud_dev_token, method):
def test_bad_token():
runner = CliRunner()

result = runner.invoke(login, f"-t abcd")
result = runner.invoke(login, "-t abcd")
assert isinstance(result.exception, LoginException)
3 changes: 2 additions & 1 deletion deeplake/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@
"gcp://",
"gs://",
"az://",
"azure://" "gdrive://",
"azure://",
"gdrive://",
)

_ENABLE_HUB_SUB_DATASETS = False
Expand Down
2 changes: 0 additions & 2 deletions deeplake/core/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,6 @@ def compress_bytes(
if not buffer:
return b""
if compression == "lz4":
if not buffer:
return b""
return numcodecs.lz4.compress(buffer)
else:
raise SampleCompressionError(
Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1939,7 +1939,7 @@ def _send_branch_creation_event(self, *args, **kwargs):
def _send_branch_deletion_event(self, *args, **kwargs):
"""overridden in DeepLakeCloudDataset"""

def _first_load_init(self):
def _first_load_init(self, verbose=True):
"""overridden in DeepLakeCloudDataset"""

@property
Expand Down
6 changes: 3 additions & 3 deletions deeplake/core/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,7 @@ def dict(self, fetch_chunks: bool = False):
def list(self, fetch_chunks: bool = False):
"""Return list data. Only applicable for tensors with 'list' base htype."""
if self.base_htype != "list":
raise Exception(f"Only supported for list tensors.")
raise Exception("Only supported for list tensors.")

if self.ndim == 1:
return list(self.numpy(fetch_chunks=fetch_chunks))
Expand All @@ -1360,14 +1360,14 @@ def list(self, fetch_chunks: bool = False):
def path(self, fetch_chunks: bool = False):
"""Return path data. Only applicable for linked tensors"""
if not self.is_link:
raise Exception(f"Only supported for linked tensors.")
raise Exception("Only supported for linked tensors.")
assert isinstance(self.chunk_engine, LinkedChunkEngine)
return self.chunk_engine.path(self.index, fetch_chunks=fetch_chunks)

def creds_key(self):
"""Return path data. Only applicable for linked tensors"""
if not self.is_link:
raise Exception(f"Only supported for linked tensors.")
raise Exception("Only supported for linked tensors.")
if self.index.values[0].subscriptable() or len(self.index.values) > 1:
raise ValueError("_linked_sample can be used only on exatcly 1 sample.")
assert isinstance(self.chunk_engine, LinkedChunkEngine)
Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def add_image(sample_in, samples_out):

@deeplake.compute
def add_images(i, sample_out):
for i in range(5):
for _ in range(5):
image = deeplake.read(get_dummy_data_path("images/flower.png"))
sample_out.append({"image": image})

Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/transform/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def my_fn(sample_in: Any, samples_out, my_arg0, my_arg1=0):
index=index,
sample=sample,
samples_processed=samples_processed,
suggest=suggest,
suggest=suggest,
) from e
finally:
reload_and_rechunk(
Expand Down
7 changes: 3 additions & 4 deletions deeplake/enterprise/convert_to_libdeeplake.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,7 @@ def dataset_to_libdeeplake(hub2_dataset):
commit_id = hub2_dataset.pending_commit_id
libdeeplake_dataset.checkout(commit_id)
slice_ = hub2_dataset.index.values[0].value
if slice_ != slice(None):
if isinstance(slice_, tuple):
slice_ = list(slice_)
libdeeplake_dataset = libdeeplake_dataset[slice_]
if slice_ != slice(None)and isinstance(slice_, tuple):
slice_ = list(slice_)
libdeeplake_dataset = libdeeplake_dataset[slice_]
return libdeeplake_dataset