Improved message for GetChunkError #12251
2998 tests run, 1668 passed, 1319 skipped, 11 failed.
Annotations
Check failure on line 1 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py
github-actions / JUnit Test Report
test_deepmemory.test_deepmemory_train_and_cancel
failed on setup with "KeyError: NoSuchKey('An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.')"
Raw output
self = <deeplake.core.storage.s3.S3Provider object at 0x7f3e3accfd30>
path = 'vectordb/testingacc2/tmp65bb_test_deepmemory_test_deepmemory_train_and_cancel/dataset_meta.json'
start_byte = None, end_byte = None
def get_bytes(
self,
path: str,
start_byte: Optional[int] = None,
end_byte: Optional[int] = None,
):
"""Gets the object present at the path within the given byte range.
Args:
path (str): The path relative to the root of the provider.
start_byte (int, optional): If only specific bytes starting from ``start_byte`` are required.
end_byte (int, optional): If only specific bytes up to end_byte are required.
Returns:
bytes: The bytes of the object present at the path within the given byte range.
Raises:
InvalidBytesRequestedError: If ``start_byte`` > ``end_byte`` or ``start_byte`` < 0 or ``end_byte`` < 0.
KeyError: If an object is not found at the path.
S3GetAccessError: Invalid credentials for the object path storage.
S3GetError: Any other error while retrieving the object.
"""
self._check_update_creds()
path = "".join((self.path, path))
try:
> return self._get_bytes(path, start_byte, end_byte)
deeplake/core/storage/s3.py:275:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/storage/s3.py:247: in _get_bytes
resp = self.client.get_object(Bucket=self.bucket, Key=path, Range=range)
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/botocore/client.py:553: in _api_call
return self._make_api_call(operation_name, kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <botocore.client.S3 object at 0x7f3d48e762f0>
operation_name = 'GetObject'
api_params = {'Bucket': 'activeloopai-db-engine-staging', 'Key': 'vectordb/testingacc2/tmp65bb_test_deepmemory_test_deepmemory_train_and_cancel/dataset_meta.json', 'Range': ''}
def _make_api_call(self, operation_name, api_params):
operation_model = self._service_model.operation_model(operation_name)
service_name = self._service_model.service_name
history_recorder.record(
'API_CALL',
{
'service': service_name,
'operation': operation_name,
'params': api_params,
},
)
if operation_model.deprecated:
logger.debug(
'Warning: %s.%s() is deprecated', service_name, operation_name
)
request_context = {
'client_region': self.meta.region_name,
'client_config': self.meta.config,
'has_streaming_input': operation_model.has_streaming_input,
'auth_type': operation_model.auth_type,
}
api_params = self._emit_api_params(
api_params=api_params,
operation_model=operation_model,
context=request_context,
)
(
endpoint_url,
additional_headers,
properties,
) = self._resolve_endpoint_ruleset(
operation_model, api_params, request_context
)
if properties:
# Pass arbitrary endpoint info with the Request
# for use during construction.
request_context['endpoint_properties'] = properties
request_dict = self._convert_to_request_dict(
api_params=api_params,
operation_model=operation_model,
endpoint_url=endpoint_url,
context=request_context,
headers=additional_headers,
)
resolve_checksum_context(request_dict, operation_model, api_params)
service_id = self._service_model.service_id.hyphenize()
handler, event_response = self.meta.events.emit_until_response(
'before-call.{service_id}.{operation_name}'.format(
service_id=service_id, operation_name=operation_name
),
model=operation_model,
params=request_dict,
request_signer=self._request_signer,
context=request_context,
)
if event_response is not None:
http, parsed_response = event_response
else:
maybe_compress_request(
self.meta.config, request_dict, operation_model
)
apply_request_checksum(request_dict)
http, parsed_response = self._make_request(
operation_model, request_dict, request_context
)
self.meta.events.emit(
'after-call.{service_id}.{operation_name}'.format(
service_id=service_id, operation_name=operation_name
),
http_response=http,
parsed=parsed_response,
model=operation_model,
context=request_context,
)
if http.status_code >= 300:
error_info = parsed_response.get("Error", {})
error_code = error_info.get("QueryErrorCode") or error_info.get(
"Code"
)
error_class = self.exceptions.from_code(error_code)
> raise error_class(parsed_response, operation_name)
E botocore.errorfactory.NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/botocore/client.py:1009: NoSuchKey
The above exception was the direct cause of the following exception:
request = <SubRequest 'corpus_query_relevances_copy' for <Function test_deepmemory_train_and_cancel>>
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@pytest.fixture
def corpus_query_relevances_copy(request, hub_cloud_dev_token):
if not is_opt_true(request, HUB_CLOUD_OPT):
pytest.skip(f"{HUB_CLOUD_OPT} flag not set")
return
corpus = _get_storage_path(request, HUB_CLOUD)
query_vs = VectorStore(
path=f"hub://{HUB_CLOUD_DEV_USERNAME}/deepmemory_test_queries",
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
)
queries = query_vs.dataset.text.data()["value"]
relevance = query_vs.dataset.metadata.data()["value"]
relevance = [rel["relevance"] for rel in relevance]
> deeplake.deepcopy(
f"hub://{HUB_CLOUD_DEV_USERNAME}/test-deepmemory10",
corpus,
token=hub_cloud_dev_token,
overwrite=True,
runtime={"tensor_db": True},
)
deeplake/tests/path_fixtures.py:491:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/api/dataset.py:1314: in deepcopy
if not dataset._allow_delete(cache_chain):
deeplake/api/dataset.py:2130: in _allow_delete
storage[get_dataset_meta_key(commit_id or FIRST_COMMIT_ID)].decode("utf-8")
deeplake/core/storage/lru_cache.py:217: in __getitem__
result = self.next_storage[path]
deeplake/core/storage/s3.py:232: in __getitem__
return self.get_bytes(path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.storage.s3.S3Provider object at 0x7f3e3accfd30>
path = 'vectordb/testingacc2/tmp65bb_test_deepmemory_test_deepmemory_train_and_cancel/dataset_meta.json'
start_byte = None, end_byte = None
def get_bytes(
self,
path: str,
start_byte: Optional[int] = None,
end_byte: Optional[int] = None,
):
"""Gets the object present at the path within the given byte range.
Args:
path (str): The path relative to the root of the provider.
start_byte (int, optional): If only specific bytes starting from ``start_byte`` are required.
end_byte (int, optional): If only specific bytes up to end_byte are required.
Returns:
bytes: The bytes of the object present at the path within the given byte range.
Raises:
InvalidBytesRequestedError: If ``start_byte`` > ``end_byte`` or ``start_byte`` < 0 or ``end_byte`` < 0.
KeyError: If an object is not found at the path.
S3GetAccessError: Invalid credentials for the object path storage.
S3GetError: Any other error while retrieving the object.
"""
self._check_update_creds()
path = "".join((self.path, path))
try:
return self._get_bytes(path, start_byte, end_byte)
except botocore.exceptions.ClientError as err:
if err.response["Error"]["Code"] == "NoSuchKey":
> raise KeyError(err) from err
E KeyError: NoSuchKey('An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.')
deeplake/core/storage/s3.py:278: KeyError
Check failure on line 948 in deeplake/enterprise/test_pytorch.py
github-actions / JUnit Test Report
test_pytorch.test_pytorch_data_decode
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd1 in position 2: invalid continuation byte
Raw output
local_auth_ds = Dataset(path='./hub_pytest/test_pytorch/test_pytorch_data_decode', tensors=['generic', 'text', 'json', 'list', 'class_label', 'image'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
@requires_libdeeplake
@requires_torch
@pytest.mark.flaky
@pytest.mark.slow
def test_pytorch_data_decode(local_auth_ds, cat_path):
with local_auth_ds as ds:
ds.create_tensor("generic")
for i in range(10):
ds.generic.append(i)
ds.create_tensor("text", htype="text")
for i in range(10):
ds.text.append(f"hello {i}")
ds.create_tensor("json", htype="json")
for i in range(10):
ds.json.append({"x": i})
ds.create_tensor("list", htype="list")
for i in range(10):
ds.list.append([i, i + 1])
ds.create_tensor("class_label", htype="class_label")
animals = [
"cat",
"dog",
"bird",
"fish",
"horse",
"cow",
"pig",
"sheep",
"goat",
"chicken",
]
ds.class_label.extend(animals)
ds.create_tensor("image", htype="image", sample_compression="jpeg")
for i in range(10):
ds.image.append(deeplake.read(cat_path))
decode_method = {tensor: "data" for tensor in list(ds.tensors.keys())}
ptds = (
ds.dataloader()
.transform(identity)
.pytorch(decode_method=decode_method, collate_fn=identity_collate)
)
> for i, batch in enumerate(ptds):
deeplake/enterprise/test_pytorch.py:948:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/enterprise/dataloader.py:881: in __next__
return next(self._iterator)
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/loader.py:155: in __next__
return next(self._iterator)
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:80: in __next__
return self.get_data()
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:117: in get_data
batch = self._next_data()
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:102: in _next_data
sample[tensor] = bytes_to_text(sample[tensor], "json")
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
buffer = b'\x08\x0b\xd1+;\x7f\x00\x00', htype = 'json'
def bytes_to_text(buffer, htype):
buffer = bytes(buffer)
if htype == "json":
arr = np.empty(1, dtype=object)
> arr[0] = json.loads(bytes.decode(buffer), cls=HubJsonDecoder)
E UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd1 in position 2: invalid continuation byte
deeplake/core/serialize.py:481: UnicodeDecodeError
Check failure on line 750 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_hub_dataset_suffix_bug
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
hub_cloud_ds = Dataset(path='hub://testingacc2/tmp75b3_test_api_test_hub_dataset_suffix_bug', tensors=[])
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@pytest.mark.slow
def test_hub_dataset_suffix_bug(hub_cloud_ds, hub_cloud_dev_token):
# creating dataset with similar name but some suffix removed from end
ds = deeplake.dataset(hub_cloud_ds.path[:-1], token=hub_cloud_dev_token)
# need to delete because it's a different path (won't be auto cleaned up)
> ds.delete()
deeplake/api/tests/test_api.py:750:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 976 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_dataset_rename[True-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7f041117d440>
path = 'hub://testingacc2/tmp75b3_test_api_test_dataset_rename-True-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
convert_to_pathlib = True
@pytest.mark.parametrize(
("ds_generator", "path", "hub_token"),
[
("local_ds_generator", "local_path", "hub_cloud_dev_token"),
pytest.param(
"s3_ds_generator", "s3_path", "hub_cloud_dev_token", marks=pytest.mark.slow
),
pytest.param(
"gcs_ds_generator",
"gcs_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
"azure_ds_generator",
"azure_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
"hub_cloud_ds_generator",
"hub_cloud_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
],
indirect=True,
)
@pytest.mark.parametrize("convert_to_pathlib", [True, False])
def test_dataset_rename(ds_generator, path, hub_token, convert_to_pathlib):
ds = ds_generator()
ds.create_tensor("abc")
ds.abc.append([1, 2, 3, 4])
new_path = "_".join([path, "renamed"])
ds.path = convert_string_to_pathlib_if_needed(ds.path, convert_to_pathlib)
new_path = convert_string_to_pathlib_if_needed(new_path, convert_to_pathlib)
with pytest.raises(RenameError):
ds.rename("wrongfolder/new_ds")
if str(ds.path).startswith("hub://"):
with pytest.raises(BadRequestException):
ds.rename(ds.path)
else:
with pytest.raises(PathNotEmptyException):
ds.rename(ds.path)
> ds = deeplake.rename(ds.path, new_path, token=hub_token)
deeplake/api/tests/test_api.py:976:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/api/dataset.py:842: in rename
deeplake.delete(old_path, token=token, creds=creds)
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 976 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_dataset_rename[False-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7f031aa72840>
path = 'hub://testingacc2/tmp75b3_test_api_test_dataset_rename-False-hub_cloud_ds_generator-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
convert_to_pathlib = False
@pytest.mark.parametrize(
("ds_generator", "path", "hub_token"),
[
("local_ds_generator", "local_path", "hub_cloud_dev_token"),
pytest.param(
"s3_ds_generator", "s3_path", "hub_cloud_dev_token", marks=pytest.mark.slow
),
pytest.param(
"gcs_ds_generator",
"gcs_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
"azure_ds_generator",
"azure_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
"hub_cloud_ds_generator",
"hub_cloud_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
],
indirect=True,
)
@pytest.mark.parametrize("convert_to_pathlib", [True, False])
def test_dataset_rename(ds_generator, path, hub_token, convert_to_pathlib):
ds = ds_generator()
ds.create_tensor("abc")
ds.abc.append([1, 2, 3, 4])
new_path = "_".join([path, "renamed"])
ds.path = convert_string_to_pathlib_if_needed(ds.path, convert_to_pathlib)
new_path = convert_string_to_pathlib_if_needed(new_path, convert_to_pathlib)
with pytest.raises(RenameError):
ds.rename("wrongfolder/new_ds")
if str(ds.path).startswith("hub://"):
with pytest.raises(BadRequestException):
ds.rename(ds.path)
else:
with pytest.raises(PathNotEmptyException):
ds.rename(ds.path)
> ds = deeplake.rename(ds.path, new_path, token=hub_token)
deeplake/api/tests/test_api.py:976:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/api/dataset.py:842: in rename
deeplake.delete(old_path, token=token, creds=creds)
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 1036 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_dataset_deepcopy[True-2-hub_cloud_path-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
path = 'hub://testingacc2/tmp75b3_test_api_test_dataset_deepcopy-True-2-hub_cloud_path-hub_cloud_dev_token-'
hub_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
num_workers = 2, progressbar = True
@pytest.mark.parametrize(
"path,hub_token",
[
["local_path", "hub_cloud_dev_token"],
pytest.param("hub_cloud_path", "hub_cloud_dev_token", marks=pytest.mark.slow),
],
indirect=True,
)
@pytest.mark.parametrize("num_workers", [2])
@pytest.mark.parametrize("progressbar", [True])
def test_dataset_deepcopy(path, hub_token, num_workers, progressbar):
src_path = "_".join((path, "src1"))
dest_path = "_".join((path, "dest1"))
src_ds = deeplake.empty(src_path, overwrite=True, token=hub_token)
with src_ds:
src_ds.info.update(key=0)
src_ds.create_tensor("a", htype="image", sample_compression="png")
src_ds.create_tensor("b", htype="class_label")
src_ds.create_tensor("c")
src_ds.create_tensor("d", dtype=bool)
src_ds.d.info.update(key=1)
src_ds["a"].append(np.ones((28, 28), dtype="uint8"))
src_ds["b"].append(0)
dest_ds = deeplake.deepcopy(
src_path,
dest_path,
token=hub_token,
num_workers=num_workers,
progressbar=progressbar,
)
assert list(dest_ds.tensors) == ["a", "b", "c", "d"]
assert dest_ds.a.meta.htype == "image"
assert dest_ds.a.meta.sample_compression == "png"
assert dest_ds.b.meta.htype == "class_label"
assert dest_ds.c.meta.htype == None
assert dest_ds.d.dtype == bool
assert dest_ds.info.key == 0
assert dest_ds.d.info.key == 1
for tensor in dest_ds.meta.tensors:
assert_array_equal(src_ds[tensor].numpy(), dest_ds[tensor].numpy())
> deeplake.delete(src_path, token=hub_token)
deeplake/api/tests/test_api.py:1036:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 52 in deeplake/api/tests/test_views.py
github-actions / JUnit Test Report
test_views.test_view_token_only
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
hub_cloud_path = 'hub://testingacc2/tmp75b3_test_views_test_view_token_only'
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
hub_cloud_dev_credentials = ('testingacc2', None)
@pytest.mark.slow
def test_view_token_only(
hub_cloud_path, hub_cloud_dev_token, hub_cloud_dev_credentials
):
ds = deeplake.empty(hub_cloud_path, token=hub_cloud_dev_token)
with ds:
populate(ds)
ds = deeplake.load(hub_cloud_path, token=hub_cloud_dev_token)
view = ds[50:100]
view.save_view(id="50to100")
ds = deeplake.load(hub_cloud_path, read_only=True, token=hub_cloud_dev_token)
view = ds[25:100]
view.save_view(id="25to100")
ds = deeplake.load(hub_cloud_path, read_only=True, token=hub_cloud_dev_token)
loaded = ds.load_view("50to100")
np.testing.assert_array_equal(loaded.images.numpy(), ds[50:100].images.numpy())
np.testing.assert_array_equal(loaded.labels.numpy(), ds[50:100].labels.numpy())
assert loaded._vds.path == posixpath.join(hub_cloud_path, ".queries/50to100")
loaded = ds.load_view("25to100")
np.testing.assert_array_equal(loaded.images.numpy(), ds[25:100].images.numpy())
np.testing.assert_array_equal(loaded.labels.numpy(), ds[25:100].labels.numpy())
assert loaded._vds.path == posixpath.join(hub_cloud_path, ".queries/25to100")
ds.delete_view("25to100")
> deeplake.delete(hub_cloud_path, token=hub_cloud_dev_token)
deeplake/api/tests/test_views.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 1063 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds = Dataset(path='hub://testingacc2/tmp75b3_test_deeplake_vectorstore_test_update_embedding-embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = <function embedding_fn3 at 0x7f03d2a40900>
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
assert_updated_vector_store(
new_embedding_value,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
embedding_fn,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
# case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
)
assert_updated_vector_store(
new_embedding_value,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
embedding_fn,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
# case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
if init_embedding_function is None:
# case 3: errors out when init_embedding_function is not specified
with pytest.raises(ValueError):
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_source_tensor=embedding_source_tensor,
)
else:
# case 4
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_source_tensor=embedding_source_tensor,
)
assert_updated_vector_store(
0,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
init_embedding_function,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
> vector_store.delete_by_path(path, token=ds.token)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:1063:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:490: in delete_by_path
deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 1063 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[None-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]
deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
object generator can't be used in 'await' expression
Raw output
ds = Dataset(path='hub://testingacc2/tmp75b3_test_deeplake_vectorstore_test_update_embedding-None-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = None
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
assert_updated_vector_store(
new_embedding_value,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
embedding_fn,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
# case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
)
assert_updated_vector_store(
new_embedding_value,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
embedding_fn,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
# case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
if init_embedding_function is None:
# case 3: errors out when init_embedding_function is not specified
with pytest.raises(ValueError):
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_source_tensor=embedding_source_tensor,
)
else:
# case 4
vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_source_tensor=embedding_source_tensor,
)
assert_updated_vector_store(
0,
vector_store,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_query,
init_embedding_function,
embedding_source_tensor,
embedding_tensor,
exec_option,
num_changed_samples=5,
)
> vector_store.delete_by_path(path, token=ds.token)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:1063:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:490: in delete_by_path
deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
return func(*args, **kwargs)
deeplake/api/dataset.py:905: in delete
ds.delete(large_ok=large_ok)
deeplake/core/dataset/deeplake_cloud_dataset.py:246: in delete
self.client.delete_dataset_entry(self.org_id, self.ds_name)
deeplake/client/client.py:306: in delete_dataset_entry
self.request(
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [400]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
> raise BadRequestException(message)
E deeplake.util.exceptions.BadRequestException: Invalid Request. One or more request parameters is incorrect.
E object generator can't be used in 'await' expression
deeplake/client/utils.py:56: BadRequestException
Check failure on line 2701 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_read_only
deeplake.util.exceptions.InvalidTokenException: Token is invalid. Make sure the full token string is included and try again.
Raw output
self = <deeplake.client.client.DeepLakeBackendClient object at 0x7f041112c690>
org_id = 'davitbun', ds_name = 'twitter-algorithm', mode = 'w'
db_engine = {'enabled': False}, no_cache = False
def get_dataset_credentials(
self,
org_id: str,
ds_name: str,
mode: Optional[str] = None,
db_engine: Optional[dict] = None,
no_cache: bool = False,
):
"""Retrieves temporary 12 hour credentials for the required dataset from the backend.
Args:
org_id (str): The name of the user/organization to which the dataset belongs.
ds_name (str): The name of the dataset being accessed.
mode (str, optional): The mode in which the user has requested to open the dataset.
If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
db_engine (dict, optional): The database engine args to use for the dataset.
no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
Returns:
tuple: containing full url to dataset, credentials, mode and expiration time respectively.
Raises:
UserNotLoggedInException: When user is not authenticated
InvalidTokenException: If the specified token is invalid
TokenPermissionError: when there are permission or other errors related to token
AgreementNotAcceptedError: when user has not accepted the agreement
NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
"""
import json
db_engine = db_engine or {}
relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
try:
> response = self.request(
"GET",
relative_url,
endpoint=self.endpoint(),
params={
"mode": mode,
"no_cache": no_cache,
"db_engine": json.dumps(db_engine),
},
).json()
deeplake/client/client.py:196:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/client/client.py:148: in request
check_response_status(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = <Response [403]>
def check_response_status(response: requests.Response):
"""Check response status and throw corresponding exception on failure."""
code = response.status_code
if code >= 200 and code < 300:
return
try:
message = response.json()["description"]
except Exception:
message = " "
if code == 400:
raise BadRequestException(message)
elif response.status_code == 401:
raise AuthenticationException
elif response.status_code == 403:
> raise AuthorizationException(message, response=response)
E deeplake.util.exceptions.AuthorizationException: You don't have permission to write to this dataset (davitbun/twitter-algorithm). If you have read permissions try accessing it with read_only=True.
deeplake/client/utils.py:60: AuthorizationException
During handling of the above exception, another exception occurred:
self = <jwt.api_jws.PyJWS object at 0x7f04090d3250>
jwt = b'PUBLIC_TOKEN_______________________________________________________________________________________________________________________________________________________'
def _load(self, jwt: str | bytes) -> tuple[bytes, bytes, dict[str, Any], bytes]:
if isinstance(jwt, str):
jwt = jwt.encode("utf-8")
if not isinstance(jwt, bytes):
raise DecodeError(f"Invalid token type. Token must be a {bytes}")
try:
> signing_input, crypto_segment = jwt.rsplit(b".", 1)
E ValueError: not enough values to unpack (expected 2, got 1)
/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jws.py:257: ValueError
The above exception was the direct cause of the following exception:
self = <deeplake.client.client.DeepLakeBackendClient object at 0x7f041112c690>
org_id = 'davitbun', ds_name = 'twitter-algorithm', mode = 'w'
db_engine = {'enabled': False}, no_cache = False
def get_dataset_credentials(
self,
org_id: str,
ds_name: str,
mode: Optional[str] = None,
db_engine: Optional[dict] = None,
no_cache: bool = False,
):
"""Retrieves temporary 12 hour credentials for the required dataset from the backend.
Args:
org_id (str): The name of the user/organization to which the dataset belongs.
ds_name (str): The name of the dataset being accessed.
mode (str, optional): The mode in which the user has requested to open the dataset.
If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
db_engine (dict, optional): The database engine args to use for the dataset.
no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
Returns:
tuple: containing full url to dataset, credentials, mode and expiration time respectively.
Raises:
UserNotLoggedInException: When user is not authenticated
InvalidTokenException: If the specified token is invalid
TokenPermissionError: when there are permission or other errors related to token
AgreementNotAcceptedError: when user has not accepted the agreement
NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
"""
import json
db_engine = db_engine or {}
relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
try:
response = self.request(
"GET",
relative_url,
endpoint=self.endpoint(),
params={
"mode": mode,
"no_cache": no_cache,
"db_engine": json.dumps(db_engine),
},
).json()
except Exception as e:
if isinstance(e, AuthorizationException):
response_data = e.response.json()
code = response_data.get("code")
if code == 1:
agreements = response_data["agreements"]
agreements = [agreement["text"] for agreement in agreements]
raise AgreementNotAcceptedError(agreements) from e
elif code == 2:
raise NotLoggedInAgreementError from e
else:
try:
> jwt.decode(self.token, options={"verify_signature": False})
deeplake/client/client.py:218:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jwt.py:210: in decode
decoded = self.decode_complete(
/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jwt.py:151: in decode_complete
decoded = api_jws.decode_complete(
/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jws.py:198: in decode_complete
payload, signing_input, header, signature = self._load(jwt)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <jwt.api_jws.PyJWS object at 0x7f04090d3250>
jwt = b'PUBLIC_TOKEN_______________________________________________________________________________________________________________________________________________________'
def _load(self, jwt: str | bytes) -> tuple[bytes, bytes, dict[str, Any], bytes]:
if isinstance(jwt, str):
jwt = jwt.encode("utf-8")
if not isinstance(jwt, bytes):
raise DecodeError(f"Invalid token type. Token must be a {bytes}")
try:
signing_input, crypto_segment = jwt.rsplit(b".", 1)
header_segment, payload_segment = signing_input.split(b".", 1)
except ValueError as err:
> raise DecodeError("Not enough segments") from err
E jwt.exceptions.DecodeError: Not enough segments
/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/site-packages/jwt/api_jws.py:260: DecodeError
During handling of the above exception, another exception occurred:
@pytest.mark.slow
def test_read_only():
> db = VectorStore("hub://davitbun/twitter-algorithm")
deeplake/core/vectorstore/test_deeplake_vectorstore.py:2701:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:120: in __init__
self.dataset_handler = get_dataset_handler(
deeplake/core/vectorstore/dataset_handlers/dataset_handler.py:13: in get_dataset_handler
return ClientSideDH(*args, **kwargs)
deeplake/core/vectorstore/dataset_handlers/client_side_dataset_handler.py:66: in __init__
self.dataset = dataset or dataset_utils.create_or_load_dataset(
deeplake/core/vectorstore/vector_search/dataset/dataset.py:60: in create_or_load_dataset
return create_dataset(
deeplake/core/vectorstore/vector_search/dataset/dataset.py:180: in create_dataset
dataset = deeplake.empty(
deeplake/api/dataset.py:452: in empty
storage, cache_chain = get_storage_and_cache_chain(
deeplake/util/storage.py:242: in get_storage_and_cache_chain
storage = storage_provider_from_path(
deeplake/util/storage.py:66: in storage_provider_from_path
storage = storage_provider_from_hub_path(
deeplake/util/storage.py:162: in storage_provider_from_hub_path
url, final_creds, mode, expiration, repo = get_dataset_credentials(
deeplake/util/storage.py:139: in get_dataset_credentials
url, final_creds, mode, expiration, repo = client.get_dataset_credentials(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.client.client.DeepLakeBackendClient object at 0x7f041112c690>
org_id = 'davitbun', ds_name = 'twitter-algorithm', mode = 'w'
db_engine = {'enabled': False}, no_cache = False
def get_dataset_credentials(
self,
org_id: str,
ds_name: str,
mode: Optional[str] = None,
db_engine: Optional[dict] = None,
no_cache: bool = False,
):
"""Retrieves temporary 12 hour credentials for the required dataset from the backend.
Args:
org_id (str): The name of the user/organization to which the dataset belongs.
ds_name (str): The name of the dataset being accessed.
mode (str, optional): The mode in which the user has requested to open the dataset.
If not provided, the backend will set mode to 'a' if user has write permission, else 'r'.
db_engine (dict, optional): The database engine args to use for the dataset.
no_cache (bool): If True, cached creds are ignored and new creds are returned. Default False.
Returns:
tuple: containing full url to dataset, credentials, mode and expiration time respectively.
Raises:
UserNotLoggedInException: When user is not authenticated
InvalidTokenException: If the specified token is invalid
TokenPermissionError: when there are permission or other errors related to token
AgreementNotAcceptedError: when user has not accepted the agreement
NotLoggedInAgreementError: when user is not authenticated and dataset has agreement which needs to be signed
"""
import json
db_engine = db_engine or {}
relative_url = GET_DATASET_CREDENTIALS_SUFFIX.format(org_id, ds_name)
try:
response = self.request(
"GET",
relative_url,
endpoint=self.endpoint(),
params={
"mode": mode,
"no_cache": no_cache,
"db_engine": json.dumps(db_engine),
},
).json()
except Exception as e:
if isinstance(e, AuthorizationException):
response_data = e.response.json()
code = response_data.get("code")
if code == 1:
agreements = response_data["agreements"]
agreements = [agreement["text"] for agreement in agreements]
raise AgreementNotAcceptedError(agreements) from e
elif code == 2:
raise NotLoggedInAgreementError from e
else:
try:
jwt.decode(self.token, options={"verify_signature": False})
except Exception:
> raise InvalidTokenException
E deeplake.util.exceptions.InvalidTokenException: Token is invalid. Make sure the full token string is included and try again.
deeplake/client/client.py:220: InvalidTokenException
Check failure on line 189 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py
github-actions / JUnit Test Report
test_deepmemory.test_deepmemory_evaluate
AssertionError: assert {'recall@1': ...@3': 0.4, ...} == {'recall@1': ...@3': 0.6, ...}
Omitting 4 identical items, use -vv to show
Differing items:
{'recall@1': 0.2} != {'recall@1': 0.4}
{'recall@3': 0.4} != {'recall@3': 0.6}
Full diff:
{
- 'recall@1': 0.4,
? ^
+ 'recall@1': 0.2,
? ^
'recall@10': 0.6,
'recall@100': 0.9,
- 'recall@3': 0.6,
? ^
+ 'recall@3': 0.4,
? ^
'recall@5': 0.6,
'recall@50': 0.7,
}
Raw output
corpus_query_relevances_copy = ('hub://testingacc2/tmp75b3_test_deepmemory_test_deepmemory_evaluate', ['0-dimensional biomaterials lack inductive pro...5107', 1]], [['32587939', 1]], ...], 'hub://testingacc2/tmp75b3_test_deepmemory_test_deepmemory_evaluate_eval_queries')
questions_embeddings_and_relevances = (array([[-0.01518817, 0.02033963, -0.01228631, ..., -0.00286692,
-0.0079668 , -0.00414979],
[-0.003503...A treatment decreases endoplasmic reticulum stress in response to general endoplasmic reticulum stress markers.', ...])
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
@pytest.mark.slow
@pytest.mark.timeout(600)
@pytest.mark.skipif(sys.platform == "win32", reason="Does not run on Windows")
@requires_libdeeplake
def test_deepmemory_evaluate(
corpus_query_relevances_copy,
questions_embeddings_and_relevances,
hub_cloud_dev_token,
):
corpus, _, _, query_path = corpus_query_relevances_copy
(
questions_embeddings,
question_relevances,
queries,
) = questions_embeddings_and_relevances
db = VectorStore(
corpus,
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
)
# when qvs_params is wrong:
with pytest.raises(ValueError):
db.deep_memory.evaluate(
queries=queries,
embedding=questions_embeddings,
relevance=question_relevances,
qvs_params={
"log_queries": True,
"branch_name": "wrong_branch",
},
)
# embedding_function is not provided in the constructor or in the eval method
with pytest.raises(ValueError):
db.deep_memory.evaluate(
queries=queries,
relevance=question_relevances,
qvs_params={
"log_queries": True,
"branch_name": "wrong_branch",
},
)
recall = db.deep_memory.evaluate(
queries=queries,
embedding=questions_embeddings,
relevance=question_relevances,
qvs_params={
"branch": "queries",
},
)
> assert recall["without model"] == {
"recall@1": 0.4,
"recall@3": 0.6,
"recall@5": 0.6,
"recall@10": 0.6,
"recall@50": 0.7,
"recall@100": 0.9,
}
E AssertionError: assert {'recall@1': ...@3': 0.4, ...} == {'recall@1': ...@3': 0.6, ...}
E Omitting 4 identical items, use -vv to show
E Differing items:
E {'recall@1': 0.2} != {'recall@1': 0.4}
E {'recall@3': 0.4} != {'recall@3': 0.6}
E Full diff:
E {
E - 'recall@1': 0.4,
E ? ^
E + 'recall@1': 0.2,
E ? ^
E 'recall@10': 0.6,
E 'recall@100': 0.9,
E - 'recall@3': 0.6,
E ? ^
E + 'recall@3': 0.4,
E ? ^
E 'recall@5': 0.6,
E 'recall@50': 0.7,
E }
deeplake/core/vectorstore/deep_memory/test_deepmemory.py:189: AssertionError