2998 tests run, 1671 passed, 1319 skipped, 8 failed.
Annotations
Check failure on line 130 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py
github-actions / JUnit Test Report
test_deepmemory.test_deepmemory_train_and_cancel
deeplake.util.exceptions.DatasetHandlerError: Path hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_copy is empty or does not exist. Cannot delete.
Raw output
path = 'hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_copy'
force = True, large_ok = True, creds = {}
token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
verbose = False
@staticmethod
@spinner
def delete(
path: Union[str, pathlib.Path],
force: bool = False,
large_ok: bool = False,
creds: Optional[Union[dict, str]] = None,
token: Optional[str] = None,
verbose: bool = False,
) -> None:
"""Deletes a dataset at a given path.
Args:
path (str, pathlib.Path): The path to the dataset to be deleted.
force (bool): Delete data regardless of whether
it looks like a deeplake dataset. All data at the path will be removed if set to ``True``.
large_ok (bool): Delete datasets larger than 1GB. Disabled by default.
creds (dict, str, optional): The string ``ENV`` or a dictionary containing credentials used to access the dataset at the path.
- If 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token' are present, these take precedence over credentials present in the environment or in credentials file. Currently only works with s3 paths.
- It supports 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token', 'endpoint_url', 'aws_region', 'profile_name' as keys.
- If 'ENV' is passed, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets. For datasets connected to hub cloud, specifying 'ENV' will override the credentials fetched from Activeloop and use local ones.
token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
verbose (bool): If True, logs will be printed. Defaults to True.
Raises:
DatasetHandlerError: If a Dataset does not exist at the given path and ``force = False``.
UserNotLoggedInException: When user is not authenticated.
NotImplementedError: When attempting to delete a managed view.
ValueError: If version is specified in the path
Warning:
This is an irreversible operation. Data once deleted cannot be recovered.
"""
path, address = process_dataset_path(path)
if address:
raise ValueError(
"deeplake.delete does not accept version address in the dataset path."
)
if creds is None:
creds = {}
feature_report_path(
path, "delete", {"Force": force, "Large_OK": large_ok}, token=token
)
try:
qtokens = ["/.queries/", "\\.queries\\"]
for qt in qtokens:
if qt in path:
raise NotImplementedError(
"Deleting managed views by path is not supported. Load the source dataset and do `ds.delete_view(id)` instead."
)
try:
ds = deeplake.load(path, verbose=False, token=token, creds=creds)
except UserNotLoggedInException:
raise UserNotLoggedInException from None
> ds.delete(large_ok=large_ok)
deeplake/api/dataset.py:905:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
During handling of the above exception, another exception occurred:
capsys = <_pytest.capture.CaptureFixture object at 0x7ffaeaec3ac0>
corpus_query_relevances_copy = ('hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel', ['0-dimensional biomaterials lack induc...]], [['32587939', 1]], ...], 'hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_eval_queries')
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@pytest.mark.slow
@pytest.mark.flaky(reruns=3)
@pytest.mark.skipif(sys.platform == "win32", reason="Does not run on Windows")
def test_deepmemory_train_and_cancel(
capsys,
corpus_query_relevances_copy,
hub_cloud_dev_token,
):
corpus, queries, relevances, _ = corpus_query_relevances_copy
db = VectorStore(
path=corpus,
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
)
with pytest.raises(ValueError):
# When embedding_function is provided neither in the constructor nor in the train method
job_id = db.deep_memory.train(
queries=queries,
relevance=relevances,
)
job_id = db.deep_memory.train(
queries=queries,
relevance=relevances,
embedding_function=embedding_fn,
)
# cancelling right after starting the job
cancelled = db.deep_memory.cancel(job_id)
assert cancelled == True
# deleting the job
deleted = db.deep_memory.delete(job_id)
assert deleted == True
# when embedding function is provided in the constructor
deeplake.deepcopy(
corpus,
corpus + "_copy",
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
)
db = VectorStore(
path=corpus + "_copy",
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
embedding_function=DummyEmbedder,
)
job_id = db.deep_memory.train(
queries=queries,
relevance=relevances,
embedding_function=embedding_fn,
)
# TODO: Investigate why it is flaky
# # cancelling right after starting the job
# cancelled = db.deep_memory.cancel(job_id)
# assert cancelled == True
# # deleting the job
# deleted = db.deep_memory.delete(job_id)
# assert deleted == True
# cancelled = db.deep_memory.cancel("non-existent-job-id")
# out_str = capsys.readouterr()
# error_str = (
# "Job with job_id='non-existent-job-id' was not cancelled!\n "
# "Error: Entity non-existent-job-id does not exist.\n"
# )
# assert cancelled == False
# assert out_str.out == error_str
> deeplake.delete(
corpus + "_copy", force=True, large_ok=True, token=hub_cloud_dev_token
)
deeplake/core/vectorstore/deep_memory/test_deepmemory.py:130:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
path = 'hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_copy'
force = True, large_ok = True, creds = {}
token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
verbose = False
@staticmethod
@spinner
def delete(
path: Union[str, pathlib.Path],
force: bool = False,
large_ok: bool = False,
creds: Optional[Union[dict, str]] = None,
token: Optional[str] = None,
verbose: bool = False,
) -> None:
"""Deletes a dataset at a given path.
Args:
path (str, pathlib.Path): The path to the dataset to be deleted.
force (bool): Delete data regardless of whether
it looks like a deeplake dataset. All data at the path will be removed if set to ``True``.
large_ok (bool): Delete datasets larger than 1GB. Disabled by default.
creds (dict, str, optional): The string ``ENV`` or a dictionary containing credentials used to access the dataset at the path.
- If 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token' are present, these take precedence over credentials present in the environment or in credentials file. Currently only works with s3 paths.
- It supports 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token', 'endpoint_url', 'aws_region', 'profile_name' as keys.
- If 'ENV' is passed, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets. For datasets connected to hub cloud, specifying 'ENV' will override the credentials fetched from Activeloop and use local ones.
token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
verbose (bool): If True, logs will be printed. Defaults to True.
Raises:
DatasetHandlerError: If a Dataset does not exist at the given path and ``force = False``.
UserNotLoggedInException: When user is not authenticated.
NotImplementedError: When attempting to delete a managed view.
ValueError: If version is specified in the path
Warning:
This is an irreversible operation. Data once deleted cannot be recovered.
"""
path, address = process_dataset_path(path)
if address:
raise ValueError(
"deeplake.delete does not accept version address in the dataset path."
)
if creds is None:
creds = {}
feature_report_path(
path, "delete", {"Force": force, "Large_OK": large_ok}, token=token
)
try:
qtokens = ["/.queries/", "\\.queries\\"]
for qt in qtokens:
if qt in path:
raise NotImplementedError(
"Deleting managed views by path is not supported. Load the source dataset and do `ds.delete_view(id)` instead."
)
try:
ds = deeplake.load(path, verbose=False, token=token, creds=creds)
except UserNotLoggedInException:
raise UserNotLoggedInException from None
ds.delete(large_ok=large_ok)
if verbose:
logger.info(f"{path} dataset deleted successfully.")
except Exception as e:
if force:
base_storage = storage_provider_from_path(
path=path,
creds=creds,
read_only=False,
token=token,
)
if len(base_storage) == 0:
> raise DatasetHandlerError(
f"Path {path} is empty or does not exist. Cannot delete."
)
E deeplake.util.exceptions.DatasetHandlerError: Path hub://testingacc2/tmpa8bf_test_deepmemory_test_deepmemory_train_and_cancel_copy is empty or does not exist. Cannot delete.
deeplake/api/dataset.py:917: DatasetHandlerError
Check failure on line 750 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_hub_dataset_suffix_bug
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
hub_cloud_ds = Dataset(path='hub://testingacc2/tmp6189_test_api_test_hub_dataset_suffix_bug', tensors=[])
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@pytest.mark.slow
def test_hub_dataset_suffix_bug(hub_cloud_ds, hub_cloud_dev_token):
# creating dataset with similar name but some suffix removed from end
ds = deeplake.dataset(hub_cloud_ds.path[:-1], token=hub_cloud_dev_token)
# need to delete because it's a different path (won't be auto cleaned up)
> ds.delete()
deeplake/api/tests/test_api.py:750:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 976 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_dataset_rename[True-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7fd8aaedc9a0>
path = 'hub://testingacc2/tmp6189_test_api_test_dataset_rename-True-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
convert_to_pathlib = True
@pytest.mark.parametrize(
("ds_generator", "path", "hub_token"),
[
("local_ds_generator", "local_path", "hub_cloud_dev_token"),
pytest.param(
"s3_ds_generator", "s3_path", "hub_cloud_dev_token", marks=pytest.mark.slow
),
pytest.param(
"gcs_ds_generator",
"gcs_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
"azure_ds_generator",
"azure_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
"hub_cloud_ds_generator",
"hub_cloud_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
],
indirect=True,
)
@pytest.mark.parametrize("convert_to_pathlib", [True, False])
def test_dataset_rename(ds_generator, path, hub_token, convert_to_pathlib):
ds = ds_generator()
ds.create_tensor("abc")
ds.abc.append([1, 2, 3, 4])
new_path = "_".join([path, "renamed"])
ds.path = convert_string_to_pathlib_if_needed(ds.path, convert_to_pathlib)
new_path = convert_string_to_pathlib_if_needed(new_path, convert_to_pathlib)
with pytest.raises(RenameError):
ds.rename("wrongfolder/new_ds")
if str(ds.path).startswith("hub://"):
with pytest.raises(BadRequestException):
ds.rename(ds.path)
else:
with pytest.raises(PathNotEmptyException):
ds.rename(ds.path)
> ds = deeplake.rename(ds.path, new_path, token=hub_token)
deeplake/api/tests/test_api.py:976:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/api/dataset.py:842: in rename
deeplake.delete(old_path, token=token, creds=creds)
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 976 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_dataset_rename[False-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7fd7bbae5d00>
path = 'hub://testingacc2/tmp6189_test_api_test_dataset_rename-False-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
convert_to_pathlib = False
@pytest.mark.parametrize(
("ds_generator", "path", "hub_token"),
[
("local_ds_generator", "local_path", "hub_cloud_dev_token"),
pytest.param(
"s3_ds_generator", "s3_path", "hub_cloud_dev_token", marks=pytest.mark.slow
),
pytest.param(
"gcs_ds_generator",
"gcs_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
"azure_ds_generator",
"azure_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
"hub_cloud_ds_generator",
"hub_cloud_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
],
indirect=True,
)
@pytest.mark.parametrize("convert_to_pathlib", [True, False])
def test_dataset_rename(ds_generator, path, hub_token, convert_to_pathlib):
ds = ds_generator()
ds.create_tensor("abc")
ds.abc.append([1, 2, 3, 4])
new_path = "_".join([path, "renamed"])
ds.path = convert_string_to_pathlib_if_needed(ds.path, convert_to_pathlib)
new_path = convert_string_to_pathlib_if_needed(new_path, convert_to_pathlib)
with pytest.raises(RenameError):
ds.rename("wrongfolder/new_ds")
if str(ds.path).startswith("hub://"):
with pytest.raises(BadRequestException):
ds.rename(ds.path)
else:
with pytest.raises(PathNotEmptyException):
ds.rename(ds.path)
> ds = deeplake.rename(ds.path, new_path, token=hub_token)
deeplake/api/tests/test_api.py:976:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/api/dataset.py:842: in rename
deeplake.delete(old_path, token=token, creds=creds)
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 1036 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_dataset_deepcopy[True-2-hub_cloud_path-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
path = 'hub://testingacc2/tmp6189_test_api_test_dataset_deepcopy-True-2-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
num_workers = 2, progressbar = True
@pytest.mark.parametrize(
"path,hub_token",
[
["local_path", "hub_cloud_dev_token"],
pytest.param("hub_cloud_path", "hub_cloud_dev_token", marks=pytest.mark.slow),
],
indirect=True,
)
@pytest.mark.parametrize("num_workers", [2])
@pytest.mark.parametrize("progressbar", [True])
def test_dataset_deepcopy(path, hub_token, num_workers, progressbar):
src_path = "_".join((path, "src1"))
dest_path = "_".join((path, "dest1"))
src_ds = deeplake.empty(src_path, overwrite=True, token=hub_token)
with src_ds:
src_ds.info.update(key=0)
src_ds.create_tensor("a", htype="image", sample_compression="png")
src_ds.create_tensor("b", htype="class_label")
src_ds.create_tensor("c")
src_ds.create_tensor("d", dtype=bool)
src_ds.d.info.update(key=1)
src_ds["a"].append(np.ones((28, 28), dtype="uint8"))
src_ds["b"].append(0)
dest_ds = deeplake.deepcopy(
src_path,
dest_path,
token=hub_token,
num_workers=num_workers,
progressbar=progressbar,
)
assert list(dest_ds.tensors) == ["a", "b", "c", "d"]
assert dest_ds.a.meta.htype == "image"
assert dest_ds.a.meta.sample_compression == "png"
assert dest_ds.b.meta.htype == "class_label"
assert dest_ds.c.meta.htype == None
assert dest_ds.d.dtype == bool
assert dest_ds.info.key == 0
assert dest_ds.d.info.key == 1
for tensor in dest_ds.meta.tensors:
assert_array_equal(src_ds[tensor].numpy(), dest_ds[tensor].numpy())
> deeplake.delete(src_path, token=hub_token)
deeplake/api/tests/test_api.py:1036:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 52 in deeplake/api/tests/test_views.py
github-actions / JUnit Test Report
test_views.test_view_token_only
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
hub_cloud_path = 'hub://testingacc2/tmp6189_test_views_test_view_token_only'
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
hub_cloud_dev_credentials = ('testingacc2', None)
@pytest.mark.slow
def test_view_token_only(
hub_cloud_path, hub_cloud_dev_token, hub_cloud_dev_credentials
):
ds = deeplake.empty(hub_cloud_path, token=hub_cloud_dev_token)
with ds:
populate(ds)
ds = deeplake.load(hub_cloud_path, token=hub_cloud_dev_token)
view = ds[50:100]
view.save_view(id="50to100")
ds = deeplake.load(hub_cloud_path, read_only=True, token=hub_cloud_dev_token)
view = ds[25:100]
view.save_view(id="25to100")
ds = deeplake.load(hub_cloud_path, read_only=True, token=hub_cloud_dev_token)
loaded = ds.load_view("50to100")
np.testing.assert_array_equal(loaded.images.numpy(), ds[50:100].images.numpy())
np.testing.assert_array_equal(loaded.labels.numpy(), ds[50:100].labels.numpy())
assert loaded._vds.path == posixpath.join(hub_cloud_path, ".queries/50to100")
loaded = ds.load_view("25to100")
np.testing.assert_array_equal(loaded.images.numpy(), ds[25:100].images.numpy())
np.testing.assert_array_equal(loaded.labels.numpy(), ds[25:100].labels.numpy())
assert loaded._vds.path == posixpath.join(hub_cloud_path, ".queries/25to100")
ds.delete_view("25to100")
> deeplake.delete(hub_cloud_path, token=hub_cloud_dev_token)
deeplake/api/tests/test_views.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 1063 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds = Dataset(path='hub://testingacc2/tmp6189_test_deeplake_vectorstore_test_update_embedding-embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = <function embedding_fn3 at 0x7fd8709c8860>
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
assert_updated_vector_store(
new_embedding_value,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
embedding_fn,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
# case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
)
assert_updated_vector_store(
new_embedding_value,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
embedding_fn,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
# case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
if init_embedding_function is None:
# case 3: errors out when init_embedding_function is not specified
with pytest.raises(ValueError):
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_source_tensor=embedding_source_tensor,
)
else:
# case 4
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_source_tensor=embedding_source_tensor,
)
assert_updated_vector_store(
0,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
init_embedding_function,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
> vector_store.delete_by_path(path, token=ds.token)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:1063:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:490: in delete_by_path
deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 1063 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[None-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds = Dataset(path='hub://testingacc2/tmp6189_test_deeplake_vectorstore_test_update_embedding-None-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = None
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
assert_updated_vector_store(
new_embedding_value,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
embedding_fn,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
# case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
)
assert_updated_vector_store(
new_embedding_value,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
embedding_fn,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
# case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
if init_embedding_function is None:
# case 3: errors out when init_embedding_function is not specified
with pytest.raises(ValueError):
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_source_tensor=embedding_source_tensor,
)
else:
# case 4
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_source_tensor=embedding_source_tensor,
)
assert_updated_vector_store(
0,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
init_embedding_function,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
> vector_store.delete_by_path(path, token=ds.token)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:1063:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:490: in delete_by_path
deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException